Skip to content

Files

Latest commit

 

History

History
1964 lines (1956 loc) · 1.89 MB

tasks.md

File metadata and controls

1964 lines (1956 loc) · 1.89 MB

Available tasks

The following tables give you an overview of the tasks in MTEB.

Name Languages Type Category Domains # Samples Dataset statistics
AFQMC ['cmn'] STS s2s None None
AILACasedocs ['eng'] Retrieval p2p [Legal, Written] None None
AILAStatutes ['eng'] Retrieval p2p [Legal, Written] None None
AJGT (Alomari et al., 2017) ['ara'] Classification s2s [Social, Written] None None
ARCChallenge (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
AROCocoOrder (Yuksekgonul et al., 2023) ['eng'] Compositionality i2t [Encyclopaedic] {'test': 25010} {'test': {'num_samples': 25010, 'num_images': 25010, 'num_texts': 125050, 'num_unique_texts': 119661, 'min_text_length': 26, 'average_text_length': 51.52, 'max_text_length': 191}}
AROFlickrOrder (Yuksekgonul et al., 2023) ['eng'] Compositionality i2t [Encyclopaedic] {'test': 5000} {'test': {'num_samples': 5000, 'num_images': 5000, 'num_texts': 25000, 'num_unique_texts': 23892, 'min_text_length': 11, 'average_text_length': 62.37, 'max_text_length': 185}}
AROVisualAttribution (Yuksekgonul et al., 2023) ['eng'] Compositionality i2t [Encyclopaedic] {'test': 28748} {'test': {'num_samples': 28748, 'num_images': 28748, 'num_texts': 57496, 'num_unique_texts': 52146, 'min_text_length': 27, 'average_text_length': 35.98, 'max_text_length': 61}}
AROVisualRelation (Yuksekgonul et al., 2023) ['eng'] Compositionality i2t [Encyclopaedic] {'test': 23937} {'test': {'num_samples': 23937, 'num_images': 23937, 'num_texts': 47874, 'num_unique_texts': 26706, 'min_text_length': 21, 'average_text_length': 34.83, 'max_text_length': 57}}
ATEC ['cmn'] STS s2s None None
AfriSentiClassification ['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] Classification s2s [Social, Written] None None
AfriSentiLangClassification ['amh', 'arq', 'ary', 'hau', 'ibo', 'kin', 'pcm', 'por', 'swa', 'tso', 'twi', 'yor'] Classification s2s [Social, Written] None None
AllegroReviews ['pol'] Classification s2s None None
AlloProfClusteringP2P.v2 (Lefebvre-Brossard et al., 2023) ['fra'] Clustering p2p [Encyclopaedic, Written] None None
AlloProfClusteringS2S.v2 (Lefebvre-Brossard et al., 2023) ['fra'] Clustering s2s [Encyclopaedic, Written] None None
AlloprofReranking (Lefebvre-Brossard et al., 2023) ['fra'] Reranking s2p [Academic, Web, Written] None None
AlloprofRetrieval (Lefebvre-Brossard et al., 2023) ['fra'] Retrieval s2p [Encyclopaedic, Written] None None
AlphaNLI (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
AmazonCounterfactualClassification ['deu', 'eng', 'jpn'] Classification s2s [Reviews, Written] None None
AmazonPolarityClassification (Julian McAuley, 2013) ['eng'] Classification p2p [Reviews, Written] None None
AmazonReviewsClassification (Phillip Keung, 2020) ['cmn', 'deu', 'eng', 'fra', 'jpn', 'spa'] Classification s2s [Reviews, Written] None None
AngryTweetsClassification (Pauli et al., 2021) ['dan'] Classification s2s [Social, Written] None None
AppsRetrieval (Dan Hendrycks, 2021) ['eng', 'python'] Retrieval p2p [Programming, Written] {'test': 12530} {'test': {'number_of_characters': 11335620, 'num_samples': 12530, 'num_queries': 3765, 'num_documents': 8765, 'min_document_length': 152, 'average_document_length': 717.27, 'max_document_length': 5742, 'unique_documents': 8765, 'min_query_length': 6, 'average_query_length': 1340.96, 'max_query_length': 289049, 'unique_queries': 3765, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3765}}
ArEntail (Obeidat et al., 2024) ['ara'] PairClassification s2s [News, Written] None None
ArXivHierarchicalClusteringP2P ['eng'] Clustering p2p [Academic, Written] {'test': 2048} {'test': {'num_samples': 2048, 'number_of_characters': 2065284, 'min_text_length': 103, 'average_text_length': 1008.44, 'max_text_length': 2103, 'min_labels_per_text': 1, 'average_labels_per_text': 1.46, 'max_labels_per_text': 381, 'unique_labels': 129, 'labels': {'cs': {'count': 356}, 'math': {'count': 381}, 'OC': {'count': 11}, 'hep-lat': {'count': 13}, 'hep': {'count': 98}, 'astro-ph': {'count': 213}, 'eess': {'count': 76}, 'quant-ph': {'count': 135}, 'DC': {'count': 5}, 'cond-mat': {'count': 274}, 'hep-th': {'count': 66}, 'SP': {'count': 33}, 'hep-ph': {'count': 69}, 'FA': {'count': 6}, 'nucl-th': {'count': 17}, 'q-bio': {'count': 80}, 'HE': {'count': 22}, 'HC': {'count': 2}, 'stat': {'count': 60}, 'ML': {'count': 16}, 'IV': {'count': 13}, 'stat-mech': {'count': 47}, 'DS': {'count': 14}, 'ME': {'count': 12}, 'CC': {'count': 2}, 'mtrl-sci': {'count': 22}, 'PE': {'count': 16}, 'NT': {'count': 11}, 'SC': {'count': 6}, 'AG': {'count': 13}, 'physics': {'count': 81}, 'ins-det': {'count': 9}, 'GA': {'count': 18}, 'BM': {'count': 6}, 'GN': {'count': 17}, 'NA': {'count': 15}, 'app-ph': {'count': 7}, 'RT': {'count': 6}, 'other': {'count': 37}, 'soft': {'count': 15}, 'CO': {'count': 33}, 'supr-con': {'count': 21}, 'chem-ph': {'count': 3}, 'DM': {'count': 2}, 'MN': {'count': 12}, 'q-fin': {'count': 27}, 'PM': {'count': 2}, 'AP': {'count': 27}, 'gr-qc': {'count': 15}, 'quant-gas': {'count': 8}, 'mes-hall': {'count': 33}, 'IT': {'count': 19}, 'SI': {'count': 6}, 'SG': {'count': 3}, 'bio-ph': {'count': 2}, 'SR': {'count': 16}, 'soc-ph': {'count': 5}, 'hep-ex': {'count': 15}, 'DG': {'count': 11}, 'NE': {'count': 5}, 'CR': {'count': 6}, 'CL': {'count': 12}, 'RM': {'count': 3}, 'econ': {'count': 17}, 'nlin': {'count': 5}, 'PS': {'count': 1}, 'LG': {'count': 26}, 'QA': {'count': 9}, 'str-el': {'count': 26}, 'CV': {'count': 34}, 'MF': {'count': 6}, 'IM': {'count': 7}, 'EM': {'count': 6}, 'TH': {'count': 5}, 'PR': {'count': 20}, 'AT': {'count': 4}, 'OA': {'count': 4}, 'CP': {'count': 6}, 'LO': {'count': 14}, 'flu-dyn': {'count': 6}, 'atom-ph': {'count': 8}, 'class-ph': {'count': 1}, 'SY': {'count': 20}, 'IR': {'count': 1}, 'plasm-ph': {'count': 8}, 'CE': {'count': 2}, 'AO': {'count': 1}, 'comp-ph': {'count': 3}, 'optics': {'count': 12}, 'MG': {'count': 4}, 'ST': {'count': 6}, 'nucl-ex': {'count': 6}, 'CY': {'count': 9}, 'ao-ph': {'count': 2}, 'DB': {'count': 1}, 'math-ph': {'count': 10}, 'NC': {'count': 13}, 'GT': {'count': 11}, 'TO': {'count': 2}, 'AI': {'count': 9}, 'NI': {'count': 2}, 'gen-ph': {'count': 4}, 'OT': {'count': 4}, 'SD': {'count': 2}, 'dis-nn': {'count': 4}, 'RO': {'count': 7}, 'CA': {'count': 6}, 'FL': {'count': 1}, 'SE': {'count': 5}, 'EP': {'count': 9}, 'hist-ph': {'count': 1}, 'QM': {'count': 9}, 'ed-ph': {'count': 2}, 'GR': {'count': 4}, 'MS': {'count': 1}, 'CD': {'count': 1}, 'ET': {'count': 1}, 'acc-ph': {'count': 5}, 'AC': {'count': 2}, 'OH': {'count': 1}, 'EC': {'count': 2}, 'DL': {'count': 1}, 'AS': {'count': 3}, 'geo-ph': {'count': 2}, 'CG': {'count': 3}, 'CB': {'count': 1}, 'AR': {'count': 1}, 'TR': {'count': 1}, 'atm-clus': {'count': 1}}}}
ArXivHierarchicalClusteringS2S ['eng'] Clustering p2p [Academic, Written] None None
ArguAna (Boteva et al., 2016) ['eng'] Retrieval s2p [Medical, Written] None None
ArguAna-Fa ['fas'] Retrieval s2p [Blog] None None
ArguAna-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
ArguAna-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Medical, Written] None None
ArmenianParaphrasePC (Arthur Malajyan, 2020) ['hye'] PairClassification s2s [News, Written] None None
ArxivClassification (He et al., 2019) ['eng'] Classification s2s [Academic, Written] None None
AskUbuntuDupQuestions ['eng'] Reranking s2s [Programming, Web] {'test': 375} {'test': {'num_samples': 375, 'number_of_characters': 413674, 'num_positive': 2255, 'num_negative': 5245, 'min_query_length': 17, 'avg_query_length': 50.21, 'max_query_length': 148, 'unique_query': 374, 'min_positive_length': 15, 'avg_positive_length': 52.54, 'max_positive_length': 152, 'unique_positive': 2165, 'min_negative_length': 15, 'avg_negative_length': 52.69, 'max_negative_length': 148, 'unique_negative': 5002}}
Assin2RTE (Real et al., 2020) ['por'] PairClassification s2s [Written] None None
Assin2STS (Real et al., 2020) ['por'] STS s2s [Written] None None
AutoRAGRetrieval (Dongkyu Kim, 2024) ['kor'] Retrieval s2p [Financial, Government, Legal, Medical, Social] {'test': 834} {'test': {'number_of_characters': 894.22, 'num_samples': 834, 'num_queries': 114, 'num_documents': 720, 'average_document_length': 1.15, 'average_query_length': 0.61, 'average_relevant_docs_per_query': 1.0}}
BIOSSES (Soğancıoğlu et al., 2017) ['eng'] STS s2s [Medical] None None
BLINKIT2IMultiChoice (Fu et al., 2024) ['eng'] VisionCentric it2i [Encyclopaedic] {'test': 1206} {'test': {'number_of_characters': 21204, 'num_samples': 1206, 'num_queries': 402, 'num_documents': 804, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 83, 'average_document_image_width': 788.44, 'max_document_image_width': 5087, 'min_document_image_height': 127, 'average_document_image_height': 813.95, 'max_document_image_height': 3230, 'num_document_images': 804, 'min_query_length': 51, 'average_query_length': 52.75, 'max_query_length': 57, 'unique_queries': 3, 'num_query_images': 402, 'min_query_image_width': 166, 'average_query_image_width': 815.13, 'max_query_image_width': 2733, 'min_query_image_height': 254, 'average_query_image_height': 875.38, 'max_query_image_height': 5687, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 804}}
BLINKIT2IRetrieval (Fu et al., 2024) ['eng'] Any2AnyRetrieval it2i [Encyclopaedic] {'test': 1206} {'test': {'number_of_characters': 21204, 'num_samples': 1206, 'num_queries': 402, 'num_documents': 804, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 804, 'min_query_length': 51, 'average_query_length': 52.75, 'max_query_length': 57, 'unique_queries': 3, 'num_query_images': 402, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 402}}
BLINKIT2TMultiChoice (Fu et al., 2024) ['eng'] VisionCentric it2t [Encyclopaedic] {'test': 813} {'test': {'number_of_characters': 54272, 'num_samples': 813, 'num_queries': 793, 'num_documents': 20, 'min_document_length': 1, 'average_document_length': 5.8, 'max_document_length': 14, 'unique_documents': 20, 'min_document_image_width': 0, 'average_document_image_width': 0, 'max_document_image_width': 0, 'min_document_image_height': 0, 'average_document_image_height': 0, 'max_document_image_height': 0, 'num_document_images': 0, 'min_query_length': 22, 'average_query_length': 68.29, 'max_query_length': 135, 'unique_queries': 347, 'num_query_images': 793, 'min_query_image_width': 63, 'average_query_image_width': 515.08, 'max_query_image_width': 4096, 'min_query_image_height': 232, 'average_query_image_height': 722.64, 'max_query_image_height': 3226, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 20}}
BLINKIT2TRetrieval (Fu et al., 2024) ['eng'] Any2AnyRetrieval it2t [Encyclopaedic] {'test': 813} {'test': {'number_of_characters': 54272, 'num_samples': 813, 'num_queries': 793, 'num_documents': 20, 'min_document_length': 1, 'average_document_length': 5.8, 'max_document_length': 14, 'unique_documents': 20, 'num_document_images': 0, 'min_query_length': 22, 'average_query_length': 68.29, 'max_query_length': 135, 'unique_queries': 347, 'num_query_images': 793, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 20}}
BQ (Shitao Xiao, 2024) ['cmn'] STS s2s None None
BSARDRetrieval (Louis et al., 2022) ['fra'] Retrieval s2p [Legal, Spoken] None None
BUCC.v2 ['cmn', 'deu', 'eng', 'fra', 'rus'] BitextMining s2s [Written] {'test': 35000} {'test': {'num_samples': 35000, 'number_of_characters': 6640032, 'unique_pairs': 34978, 'min_sentence1_length': 16, 'average_sentence1_length': 99.11, 'max_sentence1_length': 204, 'unique_sentence1': 34978, 'min_sentence2_length': 42, 'average_sentence2_length': 90.61, 'max_sentence2_length': 159, 'unique_sentence2': 25306, 'hf_subset_descriptive_stats': {'de-en': {'num_samples': 9580, 'number_of_characters': 1919197, 'unique_pairs': 9573, 'min_sentence1_length': 50, 'average_sentence1_length': 109.08, 'max_sentence1_length': 204, 'unique_sentence1': 9573, 'min_sentence2_length': 46, 'average_sentence2_length': 91.25, 'max_sentence2_length': 155, 'unique_sentence2': 9570}, 'fr-en': {'num_samples': 9086, 'number_of_characters': 1677545, 'unique_pairs': 9081, 'min_sentence1_length': 43, 'average_sentence1_length': 99.32, 'max_sentence1_length': 174, 'unique_sentence1': 9081, 'min_sentence2_length': 42, 'average_sentence2_length': 85.31, 'max_sentence2_length': 159, 'unique_sentence2': 9076}, 'ru-en': {'num_samples': 14435, 'number_of_characters': 2808206, 'unique_pairs': 14425, 'min_sentence1_length': 40, 'average_sentence1_length': 101.66, 'max_sentence1_length': 186, 'unique_sentence1': 14425, 'min_sentence2_length': 45, 'average_sentence2_length': 92.88, 'max_sentence2_length': 159, 'unique_sentence2': 14424}, 'zh-en': {'num_samples': 1899, 'number_of_characters': 235084, 'unique_pairs': 1899, 'min_sentence1_length': 16, 'average_sentence1_length': 28.43, 'max_sentence1_length': 40, 'unique_sentence1': 1899, 'min_sentence2_length': 48, 'average_sentence2_length': 95.36, 'max_sentence2_length': 159, 'unique_sentence2': 1899}}}}
Banking77Classification ['eng'] Classification s2s [Written] None None
BelebeleRetrieval (Lucas Bandarkar, 2023) ['acm', 'afr', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'azj', 'bam', 'ben', 'bod', 'bul', 'cat', 'ceb', 'ces', 'ckb', 'dan', 'deu', 'ell', 'eng', 'est', 'eus', 'fin', 'fra', 'fuv', 'gaz', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kac', 'kan', 'kat', 'kaz', 'kea', 'khk', 'khm', 'kin', 'kir', 'kor', 'lao', 'lin', 'lit', 'lug', 'luo', 'lvs', 'mal', 'mar', 'mkd', 'mlt', 'mri', 'mya', 'nld', 'nob', 'npi', 'nso', 'nya', 'ory', 'pan', 'pbt', 'pes', 'plt', 'pol', 'por', 'ron', 'rus', 'shn', 'sin', 'slk', 'slv', 'sna', 'snd', 'som', 'sot', 'spa', 'srp', 'ssw', 'sun', 'swe', 'swh', 'tam', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tsn', 'tso', 'tur', 'ukr', 'urd', 'uzn', 'vie', 'war', 'wol', 'xho', 'yor', 'zho', 'zsm', 'zul'] Retrieval s2p [News, Web, Written] {'test': 521866} {'test': {'number_of_characters': 25574620, 'num_samples': 521866, 'num_queries': 338378, 'num_documents': 183488, 'min_document_length': 4, 'average_document_length': 137.38, 'max_document_length': 237, 'unique_documents': 183488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 338378, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 183488, 'hf_subset_descriptive_stats': {'acm_Arab-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'acm_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-acm_Arab': {'number_of_characters': 51232, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 102.98, 'max_document_length': 129, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'afr_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-afr_Latn': {'number_of_characters': 71217, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 143.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'als_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-als_Latn': {'number_of_characters': 69498, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 140.41, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'amh_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-amh_Ethi': {'number_of_characters': 45221, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 90.67, 'max_document_length': 100, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'apc_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-apc_Arab': {'number_of_characters': 51248, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 103.02, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ars_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ars_Arab': {'number_of_characters': 51765, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 104.08, 'max_document_length': 119, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ary_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ary_Arab': {'number_of_characters': 60261, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 121.49, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arz_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-arz_Arab': {'number_of_characters': 52403, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 105.38, 'max_document_length': 115, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'asm_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-asm_Beng': {'number_of_characters': 62410, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 4, 'average_document_length': 125.89, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'azj_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-azj_Latn': {'number_of_characters': 67137, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.58, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bam_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bam_Latn': {'number_of_characters': 66084, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 133.42, 'max_document_length': 166, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bod_Tibt-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bod_Tibt': {'number_of_characters': 79188, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.27, 'max_document_length': 213, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'bul_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-bul_Cyrl': {'number_of_characters': 66577, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.43, 'max_document_length': 177, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'cat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-cat_Latn': {'number_of_characters': 68842, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.07, 'max_document_length': 163, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ceb_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ceb_Latn': {'number_of_characters': 74053, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 149.75, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ces_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ces_Latn': {'number_of_characters': 61936, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 124.92, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ckb_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ckb_Arab': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 131.03, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'dan_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-dan_Latn': {'number_of_characters': 66648, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 134.57, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'deu_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-deu_Latn': {'number_of_characters': 68768, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 138.92, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ell_Grek-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ell_Grek': {'number_of_characters': 79210, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 160.32, 'max_document_length': 212, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'est_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-est_Latn': {'number_of_characters': 61779, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.6, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eus_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-eus_Latn': {'number_of_characters': 67979, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 137.3, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fin_Latn': {'number_of_characters': 66234, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fra_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fra_Latn': {'number_of_characters': 82464, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 166.98, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'fuv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-fuv_Latn': {'number_of_characters': 53555, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 107.74, 'max_document_length': 122, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'gaz_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-gaz_Latn': {'number_of_characters': 78315, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 158.48, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'grn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-grn_Latn': {'number_of_characters': 68572, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 138.52, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'guj_Gujr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-guj_Gujr': {'number_of_characters': 57007, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 114.82, 'max_document_length': 138, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hat_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hat_Latn': {'number_of_characters': 64558, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.29, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hau_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hau_Latn': {'number_of_characters': 78240, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.33, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'heb_Hebr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-heb_Hebr': {'number_of_characters': 50598, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 101.68, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hrv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hrv_Latn': {'number_of_characters': 62928, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.95, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hun_Latn': {'number_of_characters': 67941, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 137.22, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hye_Armn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-hye_Armn': {'number_of_characters': 68859, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 139.1, 'max_document_length': 193, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ibo_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ibo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ibo_Latn': {'number_of_characters': 66167, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 133.59, 'max_document_length': 156, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ilo_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ilo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ilo_Latn': {'number_of_characters': 78161, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 158.17, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ind_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ind_Latn': {'number_of_characters': 74871, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 151.42, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'isl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-isl_Latn': {'number_of_characters': 70522, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 142.51, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ita_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ita_Latn': {'number_of_characters': 76124, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 153.99, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jav_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jav_Latn': {'number_of_characters': 71722, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 144.97, 'max_document_length': 174, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'jpn_Jpan-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-jpn_Jpan': {'number_of_characters': 33187, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 66.01, 'max_document_length': 76, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kac_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kac_Latn': {'number_of_characters': 89655, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 181.72, 'max_document_length': 195, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kan_Knda-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kan_Knda': {'number_of_characters': 65899, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.04, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kat_Geor-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kat_Geor': {'number_of_characters': 68309, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.98, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kaz_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kaz_Cyrl': {'number_of_characters': 64657, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 130.49, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kea_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kea_Latn': {'number_of_characters': 69323, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.06, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khk_Cyrl': {'number_of_characters': 66977, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 135.25, 'max_document_length': 162, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'khm_Khmr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-khm_Khmr': {'number_of_characters': 69150, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 139.7, 'max_document_length': 169, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kin_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kin_Latn': {'number_of_characters': 72803, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 147.19, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'kir_Cyrl-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kir_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kir_Cyrl': {'number_of_characters': 67957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 137.26, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'kor_Hang-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-kor_Hang': {'number_of_characters': 32708, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 65.02, 'max_document_length': 88, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lao_Laoo-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lao_Laoo': {'number_of_characters': 57958, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 116.77, 'max_document_length': 142, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lin_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lin_Latn': {'number_of_characters': 74223, 'num_samples': 1386, 'num_queries': 898, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 150.1, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 898, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lit_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lit_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lit_Latn': {'number_of_characters': 62805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 126.7, 'max_document_length': 167, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lug_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'lug_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lug_Latn': {'number_of_characters': 71566, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 144.65, 'max_document_length': 237, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'luo_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'luo_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-luo_Latn': {'number_of_characters': 66805, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 134.9, 'max_document_length': 178, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'lvs_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-lvs_Latn': {'number_of_characters': 63957, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 129.06, 'max_document_length': 172, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mal_Mlym-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mal_Mlym': {'number_of_characters': 73599, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.82, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mar_Deva-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mar_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mar_Deva': {'number_of_characters': 62671, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 126.42, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'mkd_Cyrl-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mkd_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mkd_Cyrl': {'number_of_characters': 67588, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 136.5, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mlt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mlt_Latn': {'number_of_characters': 68480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 138.33, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mri_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mri_Latn': {'number_of_characters': 74519, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 150.7, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'mya_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-mya_Mymr': {'number_of_characters': 81331, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 164.66, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nld_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nld_Latn': {'number_of_characters': 68789, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 138.96, 'max_document_length': 183, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nob_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nob_Latn': {'number_of_characters': 64917, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 131.03, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nso_Latn': {'number_of_characters': 79073, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 160.03, 'max_document_length': 235, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'nya_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-nya_Latn': {'number_of_characters': 82685, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.44, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ory_Orya-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ory_Orya': {'number_of_characters': 66638, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 10, 'average_document_length': 134.55, 'max_document_length': 168, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pan_Guru-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pan_Guru': {'number_of_characters': 66944, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 135.18, 'max_document_length': 157, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pbt_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pbt_Arab': {'number_of_characters': 61880, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 124.8, 'max_document_length': 155, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pes_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pes_Arab': {'number_of_characters': 59252, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 119.42, 'max_document_length': 152, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'plt_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-plt_Latn': {'number_of_characters': 86472, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 175.2, 'max_document_length': 222, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'pol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-pol_Latn': {'number_of_characters': 67664, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 136.66, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'por_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-por_Latn': {'number_of_characters': 71281, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.07, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ron_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ron_Latn': {'number_of_characters': 71844, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 145.22, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'rus_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-rus_Cyrl': {'number_of_characters': 75823, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 153.38, 'max_document_length': 196, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'shn_Mymr-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-shn_Mymr': {'number_of_characters': 69288, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 139.98, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slk_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slk_Latn': {'number_of_characters': 62663, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 126.41, 'max_document_length': 146, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'slv_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-slv_Latn': {'number_of_characters': 62895, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 126.88, 'max_document_length': 176, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sna_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sna_Latn': {'number_of_characters': 74071, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.78, 'max_document_length': 191, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'snd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-snd_Arab': {'number_of_characters': 58057, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 116.97, 'max_document_length': 164, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'som_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-som_Latn': {'number_of_characters': 82838, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 167.75, 'max_document_length': 201, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sot_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sot_Latn': {'number_of_characters': 75794, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 153.32, 'max_document_length': 186, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'spa_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-spa_Latn': {'number_of_characters': 74920, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 151.52, 'max_document_length': 180, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'srp_Cyrl-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'srp_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-srp_Cyrl': {'number_of_characters': 61657, 'num_samples': 1387, 'num_queries': 899, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 124.35, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.09, 'max_query_length': 2, 'unique_queries': 899, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 488}, 'ssw_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ssw_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ssw_Latn': {'number_of_characters': 73964, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 149.57, 'max_document_length': 182, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sun_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-sun_Latn': {'number_of_characters': 71320, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 144.15, 'max_document_length': 173, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swe_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swe_Latn': {'number_of_characters': 62785, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 126.66, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'swh_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-swh_Latn': {'number_of_characters': 73480, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 148.57, 'max_document_length': 194, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tam_Taml-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tam_Taml': {'number_of_characters': 73991, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 149.62, 'max_document_length': 181, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tel_Telu-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tel_Telu': {'number_of_characters': 65945, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 133.13, 'max_document_length': 149, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgk_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgk_Cyrl': {'number_of_characters': 67829, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 136.99, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tgl_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tgl_Latn': {'number_of_characters': 75087, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 151.87, 'max_document_length': 184, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tha_Thai-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tha_Thai': {'number_of_characters': 54496, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 109.67, 'max_document_length': 123, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tir_Ethi-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tir_Ethi': {'number_of_characters': 47775, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 95.9, 'max_document_length': 110, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tsn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tsn_Latn': {'number_of_characters': 79391, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 160.69, 'max_document_length': 204, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tso_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tso_Latn': {'number_of_characters': 83501, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 169.11, 'max_document_length': 215, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'tur_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-tur_Latn': {'number_of_characters': 65382, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 131.98, 'max_document_length': 158, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ukr_Cyrl-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-ukr_Cyrl': {'number_of_characters': 65850, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 132.94, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'uzn_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-uzn_Latn': {'number_of_characters': 70828, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 143.14, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'vie_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-vie_Latn': {'number_of_characters': 66724, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 134.73, 'max_document_length': 161, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'war_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-war_Latn': {'number_of_characters': 78444, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 158.75, 'max_document_length': 207, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'wol_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-wol_Latn': {'number_of_characters': 64521, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 130.22, 'max_document_length': 139, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'xho_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-xho_Latn': {'number_of_characters': 71629, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 16, 'average_document_length': 144.78, 'max_document_length': 179, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'yor_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-yor_Latn': {'number_of_characters': 62752, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 126.59, 'max_document_length': 143, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hans-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hans': {'number_of_characters': 20549, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 40.11, 'max_document_length': 64, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zho_Hant-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zho_Hant': {'number_of_characters': 19947, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 7, 'average_document_length': 38.88, 'max_document_length': 45, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zsm_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zsm_Latn': {'number_of_characters': 72008, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 13, 'average_document_length': 145.56, 'max_document_length': 210, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'zul_Latn-eng_Latn': {'number_of_characters': 70589, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 142.65, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'eng_Latn-zul_Latn': {'number_of_characters': 69413, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 140.24, 'max_document_length': 171, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Arab-arb_Latn': {'number_of_characters': 61298, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 12, 'average_document_length': 123.61, 'max_document_length': 160, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'arb_Latn-arb_Arab': {'number_of_characters': 53671, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 107.98, 'max_document_length': 134, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Beng-ben_Latn': {'number_of_characters': 68285, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 137.93, 'max_document_length': 185, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'ben_Latn-ben_Beng': {'number_of_characters': 63512, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 9, 'average_document_length': 128.15, 'max_document_length': 175, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Deva-hin_Latn': {'number_of_characters': 68307, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 137.97, 'max_document_length': 170, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'hin_Latn-hin_Deva': {'number_of_characters': 66332, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 14, 'average_document_length': 133.93, 'max_document_length': 165, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Deva-npi_Latn': {'number_of_characters': 65683, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 20, 'average_document_length': 132.6, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'npi_Latn-npi_Deva': {'number_of_characters': 61183, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 18, 'average_document_length': 123.38, 'max_document_length': 154, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Sinh-sin_Latn': {'number_of_characters': 85996, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 19, 'average_document_length': 174.22, 'max_document_length': 224, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'sin_Latn-sin_Sinh': {'number_of_characters': 63902, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 17, 'average_document_length': 128.95, 'max_document_length': 159, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Arab-urd_Latn': {'number_of_characters': 82039, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 15, 'average_document_length': 166.11, 'max_document_length': 230, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}, 'urd_Latn-urd_Arab': {'number_of_characters': 64450, 'num_samples': 1388, 'num_queries': 900, 'num_documents': 488, 'min_document_length': 11, 'average_document_length': 130.07, 'max_document_length': 187, 'unique_documents': 488, 'min_query_length': 2, 'average_query_length': 1.08, 'max_query_length': 2, 'unique_queries': 900, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 488}}}}
BengaliDocumentClassification ['ben'] Classification s2s [News, Written] None None
BengaliHateSpeechClassification (Karim et al., 2020) ['ben'] Classification s2s [News, Written] None None
BengaliSentimentAnalysis (Sazzed et al., 2020) ['ben'] Classification s2s [Reviews, Written] None None
BeytooteClustering ['fas'] Clustering p2p [News] None None
BibleNLPBitextMining (Akerman et al., 2023) ['aai', 'aak', 'aau', 'aaz', 'abt', 'abx', 'aby', 'acf', 'acr', 'acu', 'adz', 'aer', 'aey', 'agd', 'agg', 'agm', 'agn', 'agr', 'agt', 'agu', 'aia', 'aii', 'aka', 'ake', 'alp', 'alq', 'als', 'aly', 'ame', 'amf', 'amk', 'amm', 'amn', 'amo', 'amp', 'amr', 'amu', 'amx', 'anh', 'anv', 'aoi', 'aoj', 'aom', 'aon', 'apb', 'ape', 'apn', 'apr', 'apu', 'apw', 'apz', 'arb', 'are', 'arl', 'arn', 'arp', 'asm', 'aso', 'ata', 'atb', 'atd', 'atg', 'att', 'auc', 'aui', 'auy', 'avt', 'awb', 'awk', 'awx', 'azb', 'azg', 'azz', 'bao', 'bba', 'bbb', 'bbr', 'bch', 'bco', 'bdd', 'bea', 'bef', 'bel', 'ben', 'beo', 'beu', 'bgs', 'bgt', 'bhg', 'bhl', 'big', 'bjk', 'bjp', 'bjr', 'bjv', 'bjz', 'bkd', 'bki', 'bkq', 'bkx', 'blw', 'blz', 'bmh', 'bmk', 'bmr', 'bmu', 'bnp', 'boa', 'boj', 'bon', 'box', 'bpr', 'bps', 'bqc', 'bqp', 'bre', 'bsj', 'bsn', 'bsp', 'bss', 'buk', 'bus', 'bvd', 'bvr', 'bxh', 'byr', 'byx', 'bzd', 'bzh', 'bzj', 'caa', 'cab', 'cac', 'caf', 'cak', 'cao', 'cap', 'car', 'cav', 'cax', 'cbc', 'cbi', 'cbk', 'cbr', 'cbs', 'cbt', 'cbu', 'cbv', 'cco', 'ceb', 'cek', 'ces', 'cgc', 'cha', 'chd', 'chf', 'chk', 'chq', 'chz', 'cjo', 'cjv', 'ckb', 'cle', 'clu', 'cme', 'cmn', 'cni', 'cnl', 'cnt', 'cof', 'con', 'cop', 'cot', 'cpa', 'cpb', 'cpc', 'cpu', 'cpy', 'crn', 'crx', 'cso', 'csy', 'cta', 'cth', 'ctp', 'ctu', 'cub', 'cuc', 'cui', 'cuk', 'cut', 'cux', 'cwe', 'cya', 'daa', 'dad', 'dah', 'dan', 'ded', 'deu', 'dgc', 'dgr', 'dgz', 'dhg', 'dif', 'dik', 'dji', 'djk', 'djr', 'dob', 'dop', 'dov', 'dwr', 'dww', 'dwy', 'ebk', 'eko', 'emi', 'emp', 'eng', 'enq', 'epo', 'eri', 'ese', 'esk', 'etr', 'ewe', 'faa', 'fai', 'far', 'ffm', 'for', 'fra', 'fue', 'fuf', 'fuh', 'gah', 'gai', 'gam', 'gaw', 'gdn', 'gdr', 'geb', 'gfk', 'ghs', 'glk', 'gmv', 'gng', 'gnn', 'gnw', 'gof', 'grc', 'gub', 'guh', 'gui', 'guj', 'gul', 'gum', 'gun', 'guo', 'gup', 'gux', 'gvc', 'gvf', 'gvn', 'gvs', 'gwi', 'gym', 'gyr', 'hat', 'hau', 'haw', 'hbo', 'hch', 'heb', 'heg', 'hin', 'hix', 'hla', 'hlt', 'hmo', 'hns', 'hop', 'hot', 'hrv', 'hto', 'hub', 'hui', 'hun', 'hus', 'huu', 'huv', 'hvn', 'ian', 'ign', 'ikk', 'ikw', 'ilo', 'imo', 'inb', 'ind', 'ino', 'iou', 'ipi', 'isn', 'ita', 'iws', 'ixl', 'jac', 'jae', 'jao', 'jic', 'jid', 'jiv', 'jni', 'jpn', 'jvn', 'kan', 'kaq', 'kbc', 'kbh', 'kbm', 'kbq', 'kdc', 'kde', 'kdl', 'kek', 'ken', 'kew', 'kgf', 'kgk', 'kgp', 'khs', 'khz', 'kik', 'kiw', 'kiz', 'kje', 'kjs', 'kkc', 'kkl', 'klt', 'klv', 'kmg', 'kmh', 'kmk', 'kmo', 'kms', 'kmu', 'kne', 'knf', 'knj', 'knv', 'kos', 'kpf', 'kpg', 'kpj', 'kpr', 'kpw', 'kpx', 'kqa', 'kqc', 'kqf', 'kql', 'kqw', 'ksd', 'ksj', 'ksr', 'ktm', 'kto', 'kud', 'kue', 'kup', 'kvg', 'kvn', 'kwd', 'kwf', 'kwi', 'kwj', 'kyc', 'kyf', 'kyg', 'kyq', 'kyz', 'kze', 'lac', 'lat', 'lbb', 'lbk', 'lcm', 'leu', 'lex', 'lgl', 'lid', 'lif', 'lin', 'lit', 'llg', 'lug', 'luo', 'lww', 'maa', 'maj', 'mal', 'mam', 'maq', 'mar', 'mau', 'mav', 'maz', 'mbb', 'mbc', 'mbh', 'mbj', 'mbl', 'mbs', 'mbt', 'mca', 'mcb', 'mcd', 'mcf', 'mco', 'mcp', 'mcq', 'mcr', 'mdy', 'med', 'mee', 'mek', 'meq', 'met', 'meu', 'mgc', 'mgh', 'mgw', 'mhl', 'mib', 'mic', 'mie', 'mig', 'mih', 'mil', 'mio', 'mir', 'mit', 'miz', 'mjc', 'mkj', 'mkl', 'mkn', 'mks', 'mle', 'mlh', 'mlp', 'mmo', 'mmx', 'mna', 'mop', 'mox', 'mph', 'mpj', 'mpm', 'mpp', 'mps', 'mpt', 'mpx', 'mqb', 'mqj', 'msb', 'msc', 'msk', 'msm', 'msy', 'mti', 'mto', 'mux', 'muy', 'mva', 'mvn', 'mwc', 'mwe', 'mwf', 'mwp', 'mxb', 'mxp', 'mxq', 'mxt', 'mya', 'myk', 'myu', 'myw', 'myy', 'mzz', 'nab', 'naf', 'nak', 'nas', 'nbq', 'nca', 'nch', 'ncj', 'ncl', 'ncu', 'ndg', 'ndj', 'nfa', 'ngp', 'ngu', 'nhe', 'nhg', 'nhi', 'nho', 'nhr', 'nhu', 'nhw', 'nhy', 'nif', 'nii', 'nin', 'nko', 'nld', 'nlg', 'nna', 'nnq', 'noa', 'nop', 'not', 'nou', 'npi', 'npl', 'nsn', 'nss', 'ntj', 'ntp', 'ntu', 'nuy', 'nvm', 'nwi', 'nya', 'nys', 'nyu', 'obo', 'okv', 'omw', 'ong', 'ons', 'ood', 'opm', 'ory', 'ote', 'otm', 'otn', 'otq', 'ots', 'pab', 'pad', 'pah', 'pan', 'pao', 'pes', 'pib', 'pio', 'pir', 'piu', 'pjt', 'pls', 'plu', 'pma', 'poe', 'poh', 'poi', 'pol', 'pon', 'por', 'poy', 'ppo', 'prf', 'pri', 'ptp', 'ptu', 'pwg', 'qub', 'quc', 'quf', 'quh', 'qul', 'qup', 'qvc', 'qve', 'qvh', 'qvm', 'qvn', 'qvs', 'qvw', 'qvz', 'qwh', 'qxh', 'qxn', 'qxo', 'rai', 'reg', 'rgu', 'rkb', 'rmc', 'rmy', 'ron', 'roo', 'rop', 'row', 'rro', 'ruf', 'rug', 'rus', 'rwo', 'sab', 'san', 'sbe', 'sbk', 'sbs', 'seh', 'sey', 'sgb', 'sgz', 'shj', 'shp', 'sim', 'sja', 'sll', 'smk', 'snc', 'snn', 'snp', 'snx', 'sny', 'som', 'soq', 'soy', 'spa', 'spl', 'spm', 'spp', 'sps', 'spy', 'sri', 'srm', 'srn', 'srp', 'srq', 'ssd', 'ssg', 'ssx', 'stp', 'sua', 'sue', 'sus', 'suz', 'swe', 'swh', 'swp', 'sxb', 'tac', 'taj', 'tam', 'tav', 'taw', 'tbc', 'tbf', 'tbg', 'tbo', 'tbz', 'tca', 'tcs', 'tcz', 'tdt', 'tee', 'tel', 'ter', 'tet', 'tew', 'tfr', 'tgk', 'tgl', 'tgo', 'tgp', 'tha', 'tif', 'tim', 'tiw', 'tiy', 'tke', 'tku', 'tlf', 'tmd', 'tna', 'tnc', 'tnk', 'tnn', 'tnp', 'toc', 'tod', 'tof', 'toj', 'ton', 'too', 'top', 'tos', 'tpa', 'tpi', 'tpt', 'tpz', 'trc', 'tsw', 'ttc', 'tte', 'tuc', 'tue', 'tuf', 'tuo', 'tur', 'tvk', 'twi', 'txq', 'txu', 'tzj', 'tzo', 'ubr', 'ubu', 'udu', 'uig', 'ukr', 'uli', 'ulk', 'upv', 'ura', 'urb', 'urd', 'uri', 'urt', 'urw', 'usa', 'usp', 'uvh', 'uvl', 'vid', 'vie', 'viv', 'vmy', 'waj', 'wal', 'wap', 'wat', 'wbi', 'wbp', 'wed', 'wer', 'wim', 'wiu', 'wiv', 'wmt', 'wmw', 'wnc', 'wnu', 'wol', 'wos', 'wrk', 'wro', 'wrs', 'wsk', 'wuv', 'xav', 'xbi', 'xed', 'xla', 'xnn', 'xon', 'xsi', 'xtd', 'xtm', 'yaa', 'yad', 'yal', 'yap', 'yaq', 'yby', 'ycn', 'yka', 'yle', 'yml', 'yon', 'yor', 'yrb', 'yre', 'yss', 'yuj', 'yut', 'yuw', 'yva', 'zaa', 'zab', 'zac', 'zad', 'zai', 'zaj', 'zam', 'zao', 'zap', 'zar', 'zas', 'zat', 'zav', 'zaw', 'zca', 'zga', 'zia', 'ziw', 'zlm', 'zos', 'zpc', 'zpl', 'zpm', 'zpo', 'zpq', 'zpu', 'zpv', 'zpz', 'zsr', 'ztq', 'zty', 'zyp'] BitextMining s2s [Religious, Written] None None
BigPatentClustering.v2 (Eva Sharma and Chen Li and Lu Wang, 2019) ['eng'] Clustering p2p [Legal, Written] None None
BiorxivClusteringP2P.v2 ['eng'] Clustering p2p [Academic, Written] None None
BiorxivClusteringS2S.v2 ['eng'] Clustering s2s [Academic, Written] None None
Birdsnap (Berg et al., 2014) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 1851} {'test': {'num_samples': 1851, 'unique_num_labels': 490, 'min_image_width': 267, 'average_image_width': 2081.56, 'max_image_width': 6400, 'min_image_height': 200, 'average_image_height': 1609.19, 'max_image_height': 5400, 'labels': {'0': {'count': 4}, '1': {'count': 5}, '2': {'count': 4}, '3': {'count': 4}, '4': {'count': 4}, '5': {'count': 2}, '6': {'count': 3}, '7': {'count': 5}, '8': {'count': 4}, '9': {'count': 5}, '11': {'count': 3}, '12': {'count': 4}, '13': {'count': 5}, '14': {'count': 4}, '15': {'count': 5}, '16': {'count': 4}, '17': {'count': 3}, '18': {'count': 2}, '19': {'count': 5}, '20': {'count': 4}, '21': {'count': 4}, '22': {'count': 5}, '23': {'count': 2}, '24': {'count': 4}, '25': {'count': 3}, '26': {'count': 4}, '27': {'count': 4}, '28': {'count': 2}, '29': {'count': 5}, '30': {'count': 3}, '31': {'count': 3}, '32': {'count': 3}, '33': {'count': 4}, '34': {'count': 4}, '35': {'count': 4}, '36': {'count': 3}, '37': {'count': 3}, '38': {'count': 4}, '39': {'count': 3}, '40': {'count': 4}, '41': {'count': 3}, '42': {'count': 3}, '43': {'count': 4}, '44': {'count': 2}, '45': {'count': 3}, '47': {'count': 5}, '48': {'count': 2}, '49': {'count': 5}, '50': {'count': 4}, '51': {'count': 5}, '52': {'count': 3}, '53': {'count': 3}, '54': {'count': 4}, '55': {'count': 2}, '56': {'count': 2}, '57': {'count': 5}, '58': {'count': 2}, '59': {'count': 1}, '60': {'count': 1}, '61': {'count': 3}, '62': {'count': 3}, '63': {'count': 5}, '64': {'count': 5}, '65': {'count': 4}, '67': {'count': 2}, '68': {'count': 3}, '69': {'count': 4}, '70': {'count': 5}, '71': {'count': 5}, '72': {'count': 5}, '73': {'count': 4}, '74': {'count': 5}, '75': {'count': 4}, '76': {'count': 4}, '80': {'count': 3}, '81': {'count': 5}, '82': {'count': 3}, '83': {'count': 5}, '84': {'count': 3}, '85': {'count': 4}, '86': {'count': 4}, '87': {'count': 5}, '88': {'count': 4}, '89': {'count': 5}, '90': {'count': 4}, '91': {'count': 4}, '92': {'count': 5}, '93': {'count': 4}, '94': {'count': 4}, '95': {'count': 5}, '96': {'count': 5}, '97': {'count': 5}, '98': {'count': 3}, '99': {'count': 5}, '100': {'count': 4}, '101': {'count': 5}, '102': {'count': 4}, '103': {'count': 3}, '105': {'count': 4}, '108': {'count': 4}, '109': {'count': 5}, '110': {'count': 3}, '111': {'count': 3}, '112': {'count': 4}, '113': {'count': 4}, '114': {'count': 5}, '115': {'count': 4}, '116': {'count': 5}, '117': {'count': 4}, '118': {'count': 4}, '119': {'count': 5}, '120': {'count': 5}, '121': {'count': 4}, '122': {'count': 3}, '124': {'count': 3}, '125': {'count': 4}, '126': {'count': 2}, '127': {'count': 3}, '128': {'count': 5}, '129': {'count': 5}, '130': {'count': 5}, '131': {'count': 3}, '132': {'count': 4}, '133': {'count': 4}, '134': {'count': 2}, '135': {'count': 5}, '136': {'count': 5}, '137': {'count': 3}, '138': {'count': 4}, '139': {'count': 3}, '140': {'count': 3}, '141': {'count': 2}, '142': {'count': 3}, '143': {'count': 5}, '144': {'count': 4}, '145': {'count': 5}, '146': {'count': 5}, '147': {'count': 5}, '148': {'count': 4}, '149': {'count': 4}, '150': {'count': 5}, '151': {'count': 5}, '152': {'count': 5}, '153': {'count': 3}, '154': {'count': 4}, '155': {'count': 3}, '156': {'count': 3}, '157': {'count': 3}, '159': {'count': 3}, '160': {'count': 4}, '161': {'count': 4}, '162': {'count': 4}, '163': {'count': 4}, '164': {'count': 3}, '165': {'count': 3}, '166': {'count': 3}, '167': {'count': 4}, '168': {'count': 4}, '169': {'count': 4}, '170': {'count': 4}, '171': {'count': 5}, '172': {'count': 4}, '173': {'count': 4}, '174': {'count': 5}, '175': {'count': 4}, '176': {'count': 2}, '177': {'count': 5}, '178': {'count': 5}, '179': {'count': 5}, '180': {'count': 5}, '181': {'count': 4}, '183': {'count': 2}, '184': {'count': 3}, '185': {'count': 2}, '186': {'count': 5}, '187': {'count': 2}, '188': {'count': 3}, '189': {'count': 2}, '190': {'count': 5}, '191': {'count': 4}, '192': {'count': 3}, '193': {'count': 3}, '194': {'count': 4}, '195': {'count': 3}, '196': {'count': 4}, '197': {'count': 3}, '198': {'count': 4}, '199': {'count': 5}, '200': {'count': 5}, '201': {'count': 1}, '204': {'count': 4}, '205': {'count': 5}, '206': {'count': 4}, '207': {'count': 3}, '208': {'count': 4}, '209': {'count': 4}, '210': {'count': 4}, '211': {'count': 4}, '212': {'count': 5}, '213': {'count': 4}, '214': {'count': 5}, '215': {'count': 3}, '216': {'count': 1}, '217': {'count': 5}, '218': {'count': 2}, '219': {'count': 5}, '220': {'count': 4}, '221': {'count': 5}, '222': {'count': 5}, '223': {'count': 3}, '224': {'count': 4}, '225': {'count': 5}, '226': {'count': 3}, '227': {'count': 4}, '228': {'count': 3}, '229': {'count': 4}, '230': {'count': 4}, '231': {'count': 5}, '232': {'count': 5}, '233': {'count': 5}, '234': {'count': 4}, '235': {'count': 4}, '236': {'count': 5}, '237': {'count': 5}, '238': {'count': 5}, '239': {'count': 4}, '240': {'count': 3}, '241': {'count': 3}, '242': {'count': 4}, '243': {'count': 5}, '244': {'count': 2}, '245': {'count': 4}, '246': {'count': 5}, '247': {'count': 3}, '248': {'count': 3}, '249': {'count': 5}, '250': {'count': 5}, '251': {'count': 4}, '252': {'count': 2}, '253': {'count': 5}, '254': {'count': 5}, '255': {'count': 5}, '256': {'count': 4}, '257': {'count': 4}, '258': {'count': 4}, '259': {'count': 3}, '260': {'count': 5}, '261': {'count': 4}, '262': {'count': 4}, '264': {'count': 4}, '265': {'count': 3}, '266': {'count': 5}, '267': {'count': 5}, '268': {'count': 3}, '269': {'count': 2}, '270': {'count': 3}, '271': {'count': 4}, '272': {'count': 4}, '273': {'count': 5}, '274': {'count': 5}, '275': {'count': 5}, '276': {'count': 2}, '277': {'count': 3}, '278': {'count': 5}, '279': {'count': 5}, '280': {'count': 4}, '281': {'count': 5}, '282': {'count': 5}, '283': {'count': 3}, '284': {'count': 5}, '285': {'count': 3}, '286': {'count': 5}, '287': {'count': 5}, '288': {'count': 4}, '289': {'count': 4}, '290': {'count': 5}, '291': {'count': 3}, '292': {'count': 2}, '293': {'count': 1}, '294': {'count': 1}, '295': {'count': 2}, '296': {'count': 4}, '297': {'count': 5}, '298': {'count': 4}, '300': {'count': 3}, '301': {'count': 3}, '303': {'count': 4}, '304': {'count': 4}, '305': {'count': 4}, '306': {'count': 2}, '307': {'count': 5}, '308': {'count': 4}, '309': {'count': 2}, '310': {'count': 3}, '311': {'count': 3}, '312': {'count': 4}, '313': {'count': 3}, '314': {'count': 3}, '315': {'count': 3}, '316': {'count': 5}, '317': {'count': 4}, '318': {'count': 5}, '319': {'count': 4}, '320': {'count': 4}, '321': {'count': 3}, '322': {'count': 5}, '323': {'count': 4}, '324': {'count': 2}, '325': {'count': 1}, '326': {'count': 3}, '327': {'count': 4}, '328': {'count': 3}, '330': {'count': 4}, '331': {'count': 4}, '332': {'count': 2}, '333': {'count': 5}, '334': {'count': 5}, '335': {'count': 5}, '336': {'count': 4}, '337': {'count': 4}, '338': {'count': 5}, '339': {'count': 3}, '340': {'count': 5}, '341': {'count': 5}, '342': {'count': 5}, '343': {'count': 2}, '344': {'count': 2}, '345': {'count': 3}, '346': {'count': 3}, '347': {'count': 5}, '348': {'count': 3}, '349': {'count': 2}, '350': {'count': 4}, '352': {'count': 5}, '353': {'count': 3}, '354': {'count': 5}, '355': {'count': 5}, '356': {'count': 4}, '357': {'count': 3}, '358': {'count': 3}, '359': {'count': 4}, '360': {'count': 5}, '361': {'count': 5}, '362': {'count': 4}, '363': {'count': 3}, '364': {'count': 4}, '365': {'count': 1}, '366': {'count': 4}, '367': {'count': 3}, '368': {'count': 4}, '369': {'count': 3}, '370': {'count': 5}, '371': {'count': 3}, '372': {'count': 5}, '373': {'count': 4}, '374': {'count': 4}, '375': {'count': 3}, '376': {'count': 4}, '377': {'count': 4}, '378': {'count': 4}, '379': {'count': 4}, '380': {'count': 4}, '381': {'count': 4}, '382': {'count': 1}, '383': {'count': 4}, '384': {'count': 4}, '385': {'count': 4}, '386': {'count': 2}, '387': {'count': 4}, '388': {'count': 2}, '389': {'count': 5}, '390': {'count': 4}, '391': {'count': 5}, '392': {'count': 4}, '394': {'count': 4}, '395': {'count': 4}, '396': {'count': 4}, '397': {'count': 4}, '398': {'count': 5}, '399': {'count': 4}, '400': {'count': 5}, '401': {'count': 4}, '402': {'count': 4}, '404': {'count': 5}, '405': {'count': 5}, '406': {'count': 5}, '407': {'count': 4}, '408': {'count': 2}, '409': {'count': 4}, '410': {'count': 3}, '411': {'count': 5}, '412': {'count': 4}, '413': {'count': 3}, '414': {'count': 4}, '415': {'count': 4}, '416': {'count': 4}, '417': {'count': 5}, '418': {'count': 3}, '419': {'count': 5}, '421': {'count': 4}, '422': {'count': 3}, '423': {'count': 5}, '424': {'count': 5}, '425': {'count': 2}, '426': {'count': 5}, '427': {'count': 4}, '428': {'count': 5}, '429': {'count': 3}, '430': {'count': 2}, '431': {'count': 3}, '432': {'count': 5}, '433': {'count': 4}, '434': {'count': 3}, '435': {'count': 3}, '437': {'count': 3}, '438': {'count': 5}, '439': {'count': 2}, '440': {'count': 4}, '441': {'count': 4}, '442': {'count': 5}, '443': {'count': 2}, '444': {'count': 3}, '445': {'count': 3}, '446': {'count': 5}, '447': {'count': 3}, '448': {'count': 2}, '449': {'count': 1}, '450': {'count': 3}, '451': {'count': 3}, '452': {'count': 4}, '453': {'count': 2}, '454': {'count': 4}, '455': {'count': 4}, '456': {'count': 5}, '458': {'count': 4}, '459': {'count': 4}, '460': {'count': 5}, '461': {'count': 4}, '462': {'count': 4}, '463': {'count': 5}, '464': {'count': 5}, '466': {'count': 2}, '467': {'count': 4}, '468': {'count': 3}, '469': {'count': 5}, '470': {'count': 5}, '471': {'count': 2}, '472': {'count': 4}, '473': {'count': 3}, '474': {'count': 5}, '475': {'count': 5}, '476': {'count': 5}, '477': {'count': 4}, '478': {'count': 2}, '479': {'count': 4}, '480': {'count': 4}, '481': {'count': 5}, '482': {'count': 4}, '483': {'count': 3}, '484': {'count': 5}, '485': {'count': 5}, '486': {'count': 4}, '487': {'count': 3}, '488': {'count': 3}, '489': {'count': 1}, '490': {'count': 1}, '491': {'count': 2}, '492': {'count': 4}, '493': {'count': 4}, '494': {'count': 3}, '495': {'count': 4}, '496': {'count': 5}, '497': {'count': 5}, '498': {'count': 5}, '499': {'count': 4}, '79': {'count': 4}, '106': {'count': 4}, '107': {'count': 4}, '202': {'count': 1}, '203': {'count': 1}, '457': {'count': 3}, '77': {'count': 2}, '78': {'count': 4}, '182': {'count': 2}, '263': {'count': 4}, '104': {'count': 1}, '158': {'count': 5}, '329': {'count': 1}, '393': {'count': 2}, '420': {'count': 2}}}}
BirdsnapZeroShot (Berg et al., 2014) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 1851} {'test': {'num_samples': 1851, 'unique_num_labels': 490, 'min_image_width': 267, 'average_image_width': 2081.56, 'max_image_width': 6400, 'min_image_height': 200, 'average_image_height': 1609.19, 'max_image_height': 5400, 'min_label_text_length': 34, 'average_label_text_length': 45.84, 'max_label_text_length': 60, 'labels': {'0': {'count': 4}, '1': {'count': 5}, '2': {'count': 4}, '3': {'count': 4}, '4': {'count': 4}, '5': {'count': 2}, '6': {'count': 3}, '7': {'count': 5}, '8': {'count': 4}, '9': {'count': 5}, '11': {'count': 3}, '12': {'count': 4}, '13': {'count': 5}, '14': {'count': 4}, '15': {'count': 5}, '16': {'count': 4}, '17': {'count': 3}, '18': {'count': 2}, '19': {'count': 5}, '20': {'count': 4}, '21': {'count': 4}, '22': {'count': 5}, '23': {'count': 2}, '24': {'count': 4}, '25': {'count': 3}, '26': {'count': 4}, '27': {'count': 4}, '28': {'count': 2}, '29': {'count': 5}, '30': {'count': 3}, '31': {'count': 3}, '32': {'count': 3}, '33': {'count': 4}, '34': {'count': 4}, '35': {'count': 4}, '36': {'count': 3}, '37': {'count': 3}, '38': {'count': 4}, '39': {'count': 3}, '40': {'count': 4}, '41': {'count': 3}, '42': {'count': 3}, '43': {'count': 4}, '44': {'count': 2}, '45': {'count': 3}, '47': {'count': 5}, '48': {'count': 2}, '49': {'count': 5}, '50': {'count': 4}, '51': {'count': 5}, '52': {'count': 3}, '53': {'count': 3}, '54': {'count': 4}, '55': {'count': 2}, '56': {'count': 2}, '57': {'count': 5}, '58': {'count': 2}, '59': {'count': 1}, '60': {'count': 1}, '61': {'count': 3}, '62': {'count': 3}, '63': {'count': 5}, '64': {'count': 5}, '65': {'count': 4}, '67': {'count': 2}, '68': {'count': 3}, '69': {'count': 4}, '70': {'count': 5}, '71': {'count': 5}, '72': {'count': 5}, '73': {'count': 4}, '74': {'count': 5}, '75': {'count': 4}, '76': {'count': 4}, '80': {'count': 3}, '81': {'count': 5}, '82': {'count': 3}, '83': {'count': 5}, '84': {'count': 3}, '85': {'count': 4}, '86': {'count': 4}, '87': {'count': 5}, '88': {'count': 4}, '89': {'count': 5}, '90': {'count': 4}, '91': {'count': 4}, '92': {'count': 5}, '93': {'count': 4}, '94': {'count': 4}, '95': {'count': 5}, '96': {'count': 5}, '97': {'count': 5}, '98': {'count': 3}, '99': {'count': 5}, '100': {'count': 4}, '101': {'count': 5}, '102': {'count': 4}, '103': {'count': 3}, '105': {'count': 4}, '108': {'count': 4}, '109': {'count': 5}, '110': {'count': 3}, '111': {'count': 3}, '112': {'count': 4}, '113': {'count': 4}, '114': {'count': 5}, '115': {'count': 4}, '116': {'count': 5}, '117': {'count': 4}, '118': {'count': 4}, '119': {'count': 5}, '120': {'count': 5}, '121': {'count': 4}, '122': {'count': 3}, '124': {'count': 3}, '125': {'count': 4}, '126': {'count': 2}, '127': {'count': 3}, '128': {'count': 5}, '129': {'count': 5}, '130': {'count': 5}, '131': {'count': 3}, '132': {'count': 4}, '133': {'count': 4}, '134': {'count': 2}, '135': {'count': 5}, '136': {'count': 5}, '137': {'count': 3}, '138': {'count': 4}, '139': {'count': 3}, '140': {'count': 3}, '141': {'count': 2}, '142': {'count': 3}, '143': {'count': 5}, '144': {'count': 4}, '145': {'count': 5}, '146': {'count': 5}, '147': {'count': 5}, '148': {'count': 4}, '149': {'count': 4}, '150': {'count': 5}, '151': {'count': 5}, '152': {'count': 5}, '153': {'count': 3}, '154': {'count': 4}, '155': {'count': 3}, '156': {'count': 3}, '157': {'count': 3}, '159': {'count': 3}, '160': {'count': 4}, '161': {'count': 4}, '162': {'count': 4}, '163': {'count': 4}, '164': {'count': 3}, '165': {'count': 3}, '166': {'count': 3}, '167': {'count': 4}, '168': {'count': 4}, '169': {'count': 4}, '170': {'count': 4}, '171': {'count': 5}, '172': {'count': 4}, '173': {'count': 4}, '174': {'count': 5}, '175': {'count': 4}, '176': {'count': 2}, '177': {'count': 5}, '178': {'count': 5}, '179': {'count': 5}, '180': {'count': 5}, '181': {'count': 4}, '183': {'count': 2}, '184': {'count': 3}, '185': {'count': 2}, '186': {'count': 5}, '187': {'count': 2}, '188': {'count': 3}, '189': {'count': 2}, '190': {'count': 5}, '191': {'count': 4}, '192': {'count': 3}, '193': {'count': 3}, '194': {'count': 4}, '195': {'count': 3}, '196': {'count': 4}, '197': {'count': 3}, '198': {'count': 4}, '199': {'count': 5}, '200': {'count': 5}, '201': {'count': 1}, '204': {'count': 4}, '205': {'count': 5}, '206': {'count': 4}, '207': {'count': 3}, '208': {'count': 4}, '209': {'count': 4}, '210': {'count': 4}, '211': {'count': 4}, '212': {'count': 5}, '213': {'count': 4}, '214': {'count': 5}, '215': {'count': 3}, '216': {'count': 1}, '217': {'count': 5}, '218': {'count': 2}, '219': {'count': 5}, '220': {'count': 4}, '221': {'count': 5}, '222': {'count': 5}, '223': {'count': 3}, '224': {'count': 4}, '225': {'count': 5}, '226': {'count': 3}, '227': {'count': 4}, '228': {'count': 3}, '229': {'count': 4}, '230': {'count': 4}, '231': {'count': 5}, '232': {'count': 5}, '233': {'count': 5}, '234': {'count': 4}, '235': {'count': 4}, '236': {'count': 5}, '237': {'count': 5}, '238': {'count': 5}, '239': {'count': 4}, '240': {'count': 3}, '241': {'count': 3}, '242': {'count': 4}, '243': {'count': 5}, '244': {'count': 2}, '245': {'count': 4}, '246': {'count': 5}, '247': {'count': 3}, '248': {'count': 3}, '249': {'count': 5}, '250': {'count': 5}, '251': {'count': 4}, '252': {'count': 2}, '253': {'count': 5}, '254': {'count': 5}, '255': {'count': 5}, '256': {'count': 4}, '257': {'count': 4}, '258': {'count': 4}, '259': {'count': 3}, '260': {'count': 5}, '261': {'count': 4}, '262': {'count': 4}, '264': {'count': 4}, '265': {'count': 3}, '266': {'count': 5}, '267': {'count': 5}, '268': {'count': 3}, '269': {'count': 2}, '270': {'count': 3}, '271': {'count': 4}, '272': {'count': 4}, '273': {'count': 5}, '274': {'count': 5}, '275': {'count': 5}, '276': {'count': 2}, '277': {'count': 3}, '278': {'count': 5}, '279': {'count': 5}, '280': {'count': 4}, '281': {'count': 5}, '282': {'count': 5}, '283': {'count': 3}, '284': {'count': 5}, '285': {'count': 3}, '286': {'count': 5}, '287': {'count': 5}, '288': {'count': 4}, '289': {'count': 4}, '290': {'count': 5}, '291': {'count': 3}, '292': {'count': 2}, '293': {'count': 1}, '294': {'count': 1}, '295': {'count': 2}, '296': {'count': 4}, '297': {'count': 5}, '298': {'count': 4}, '300': {'count': 3}, '301': {'count': 3}, '303': {'count': 4}, '304': {'count': 4}, '305': {'count': 4}, '306': {'count': 2}, '307': {'count': 5}, '308': {'count': 4}, '309': {'count': 2}, '310': {'count': 3}, '311': {'count': 3}, '312': {'count': 4}, '313': {'count': 3}, '314': {'count': 3}, '315': {'count': 3}, '316': {'count': 5}, '317': {'count': 4}, '318': {'count': 5}, '319': {'count': 4}, '320': {'count': 4}, '321': {'count': 3}, '322': {'count': 5}, '323': {'count': 4}, '324': {'count': 2}, '325': {'count': 1}, '326': {'count': 3}, '327': {'count': 4}, '328': {'count': 3}, '330': {'count': 4}, '331': {'count': 4}, '332': {'count': 2}, '333': {'count': 5}, '334': {'count': 5}, '335': {'count': 5}, '336': {'count': 4}, '337': {'count': 4}, '338': {'count': 5}, '339': {'count': 3}, '340': {'count': 5}, '341': {'count': 5}, '342': {'count': 5}, '343': {'count': 2}, '344': {'count': 2}, '345': {'count': 3}, '346': {'count': 3}, '347': {'count': 5}, '348': {'count': 3}, '349': {'count': 2}, '350': {'count': 4}, '352': {'count': 5}, '353': {'count': 3}, '354': {'count': 5}, '355': {'count': 5}, '356': {'count': 4}, '357': {'count': 3}, '358': {'count': 3}, '359': {'count': 4}, '360': {'count': 5}, '361': {'count': 5}, '362': {'count': 4}, '363': {'count': 3}, '364': {'count': 4}, '365': {'count': 1}, '366': {'count': 4}, '367': {'count': 3}, '368': {'count': 4}, '369': {'count': 3}, '370': {'count': 5}, '371': {'count': 3}, '372': {'count': 5}, '373': {'count': 4}, '374': {'count': 4}, '375': {'count': 3}, '376': {'count': 4}, '377': {'count': 4}, '378': {'count': 4}, '379': {'count': 4}, '380': {'count': 4}, '381': {'count': 4}, '382': {'count': 1}, '383': {'count': 4}, '384': {'count': 4}, '385': {'count': 4}, '386': {'count': 2}, '387': {'count': 4}, '388': {'count': 2}, '389': {'count': 5}, '390': {'count': 4}, '391': {'count': 5}, '392': {'count': 4}, '394': {'count': 4}, '395': {'count': 4}, '396': {'count': 4}, '397': {'count': 4}, '398': {'count': 5}, '399': {'count': 4}, '400': {'count': 5}, '401': {'count': 4}, '402': {'count': 4}, '404': {'count': 5}, '405': {'count': 5}, '406': {'count': 5}, '407': {'count': 4}, '408': {'count': 2}, '409': {'count': 4}, '410': {'count': 3}, '411': {'count': 5}, '412': {'count': 4}, '413': {'count': 3}, '414': {'count': 4}, '415': {'count': 4}, '416': {'count': 4}, '417': {'count': 5}, '418': {'count': 3}, '419': {'count': 5}, '421': {'count': 4}, '422': {'count': 3}, '423': {'count': 5}, '424': {'count': 5}, '425': {'count': 2}, '426': {'count': 5}, '427': {'count': 4}, '428': {'count': 5}, '429': {'count': 3}, '430': {'count': 2}, '431': {'count': 3}, '432': {'count': 5}, '433': {'count': 4}, '434': {'count': 3}, '435': {'count': 3}, '437': {'count': 3}, '438': {'count': 5}, '439': {'count': 2}, '440': {'count': 4}, '441': {'count': 4}, '442': {'count': 5}, '443': {'count': 2}, '444': {'count': 3}, '445': {'count': 3}, '446': {'count': 5}, '447': {'count': 3}, '448': {'count': 2}, '449': {'count': 1}, '450': {'count': 3}, '451': {'count': 3}, '452': {'count': 4}, '453': {'count': 2}, '454': {'count': 4}, '455': {'count': 4}, '456': {'count': 5}, '458': {'count': 4}, '459': {'count': 4}, '460': {'count': 5}, '461': {'count': 4}, '462': {'count': 4}, '463': {'count': 5}, '464': {'count': 5}, '466': {'count': 2}, '467': {'count': 4}, '468': {'count': 3}, '469': {'count': 5}, '470': {'count': 5}, '471': {'count': 2}, '472': {'count': 4}, '473': {'count': 3}, '474': {'count': 5}, '475': {'count': 5}, '476': {'count': 5}, '477': {'count': 4}, '478': {'count': 2}, '479': {'count': 4}, '480': {'count': 4}, '481': {'count': 5}, '482': {'count': 4}, '483': {'count': 3}, '484': {'count': 5}, '485': {'count': 5}, '486': {'count': 4}, '487': {'count': 3}, '488': {'count': 3}, '489': {'count': 1}, '490': {'count': 1}, '491': {'count': 2}, '492': {'count': 4}, '493': {'count': 4}, '494': {'count': 3}, '495': {'count': 4}, '496': {'count': 5}, '497': {'count': 5}, '498': {'count': 5}, '499': {'count': 4}, '79': {'count': 4}, '106': {'count': 4}, '107': {'count': 4}, '202': {'count': 1}, '203': {'count': 1}, '457': {'count': 3}, '77': {'count': 2}, '78': {'count': 4}, '182': {'count': 2}, '263': {'count': 4}, '104': {'count': 1}, '158': {'count': 5}, '329': {'count': 1}, '393': {'count': 2}, '420': {'count': 2}}}}
BlurbsClusteringP2P.v2 (Steffen Remus, 2019) ['deu'] Clustering p2p [Fiction, Written] None None
BlurbsClusteringS2S.v2 (Steffen Remus, 2019) ['deu'] Clustering s2s [Fiction, Written] None None
BornholmBitextMining ['dan'] BitextMining s2s [Fiction, Social, Web, Written] {'test': 500} {'test': {'num_samples': 500, 'number_of_characters': 44361, 'unique_pairs': 500, 'min_sentence1_length': 1, 'average_sentence1_length': 49.83, 'max_sentence1_length': 555, 'unique_sentence1': 497, 'min_sentence2_length': 5, 'average_sentence2_length': 38.89, 'max_sentence2_length': 453, 'unique_sentence2': 491}}
BrazilianToxicTweetsClassification (Joao Augusto Leite and Diego F. Silva and Kalina Bontcheva and Carolina Scarton, 2020) ['por'] MultilabelClassification s2s [Constructed, Written] None None
BrightLongRetrieval (Hongjin Su, 2024) ['eng'] Retrieval s2p [Non-fiction, Written] None None
BrightRetrieval (Hongjin Su, 2024) ['eng'] Retrieval s2p [Non-fiction, Written] None None
BuiltBenchClusteringP2P (Shahinmoghadam et al., 2024) ['eng'] Clustering p2p [Engineering, Written] None None
BuiltBenchClusteringS2S (Shahinmoghadam et al., 2024) ['eng'] Clustering s2s [Engineering, Written] None None
BuiltBenchReranking (Shahinmoghadam et al., 2024) ['eng'] Reranking p2p [Engineering, Written] {'test': 179} {'test': {'num_samples': 179, 'number_of_characters': 1572224, 'num_positive': 1253, 'num_negative': 3759, 'min_query_length': 35, 'avg_query_length': 140.44, 'max_query_length': 307, 'unique_query': 82, 'min_positive_length': 190, 'avg_positive_length': 311.32, 'max_positive_length': 477, 'unique_positive': 1253, 'min_negative_length': 166, 'avg_negative_length': 307.79, 'max_negative_length': 508, 'unique_negative': 2241}}
BuiltBenchRetrieval (Shahinmoghadam et al., 2024) ['eng'] Retrieval p2p [Engineering, Written] None None
BulgarianStoreReviewSentimentClassfication (Georgieva-Trifonova et al., 2018) ['bul'] Classification s2s [Reviews, Written] None None
CBD ['pol'] Classification s2s [Social, Written] None None
CDSC-E ['pol'] PairClassification s2s [Written] None None
CDSC-R ['pol'] STS s2s [Web, Written] None None
CEDRClassification (Sboev et al., 2021) ['rus'] MultilabelClassification s2s [Blog, Social, Web, Written] {'test': 1882, 'train': 7528} {'test': {'num_samples': 1882, 'number_of_characters': 171649, 'number_texts_in_train': 7, 'min_text_length': 6, 'average_text_length': 91.21, 'max_text_length': 220, 'unique_texts': 1875, 'min_labels_per_text': 0, 'average_label_per_text': 0.62, 'max_labels_per_text': 2, 'unique_labels': 6, 'labels': {'None': {'count': 734}, '3': {'count': 141}, '2': {'count': 170}, '1': {'count': 379}, '0': {'count': 353}, '4': {'count': 125}}}, 'train': {'num_samples': 7528, 'number_of_characters': 697322, 'number_texts_in_train': None, 'min_text_length': 5, 'average_text_length': 92.63, 'max_text_length': 280, 'unique_texts': 7500, 'min_labels_per_text': 0, 'average_label_per_text': 0.61, 'max_labels_per_text': 3, 'unique_labels': 6, 'labels': {'None': {'count': 3043}, '2': {'count': 607}, '0': {'count': 1569}, '3': {'count': 589}, '1': {'count': 1417}, '4': {'count': 411}}}}
CExaPPC ['fas'] PairClassification s2s [Social, Web] None None
CIFAR10 (Alex Krizhevsky, 2009) ['eng'] ImageClassification i2i [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 10, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'labels': {'3': {'count': 1000}, '8': {'count': 1000}, '0': {'count': 1000}, '6': {'count': 1000}, '1': {'count': 1000}, '9': {'count': 1000}, '5': {'count': 1000}, '7': {'count': 1000}, '4': {'count': 1000}, '2': {'count': 1000}}}}
CIFAR100 (Alex Krizhevsky, 2009) ['eng'] ImageClassification i2t [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 100, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'labels': {'49': {'count': 100}, '33': {'count': 100}, '72': {'count': 100}, '51': {'count': 100}, '71': {'count': 100}, '92': {'count': 100}, '15': {'count': 100}, '14': {'count': 100}, '23': {'count': 100}, '0': {'count': 100}, '75': {'count': 100}, '81': {'count': 100}, '69': {'count': 100}, '40': {'count': 100}, '43': {'count': 100}, '97': {'count': 100}, '70': {'count': 100}, '53': {'count': 100}, '29': {'count': 100}, '21': {'count': 100}, '16': {'count': 100}, '39': {'count': 100}, '8': {'count': 100}, '20': {'count': 100}, '61': {'count': 100}, '41': {'count': 100}, '93': {'count': 100}, '56': {'count': 100}, '73': {'count': 100}, '58': {'count': 100}, '11': {'count': 100}, '25': {'count': 100}, '37': {'count': 100}, '63': {'count': 100}, '24': {'count': 100}, '22': {'count': 100}, '17': {'count': 100}, '4': {'count': 100}, '6': {'count': 100}, '9': {'count': 100}, '57': {'count': 100}, '2': {'count': 100}, '32': {'count': 100}, '52': {'count': 100}, '42': {'count': 100}, '77': {'count': 100}, '27': {'count': 100}, '65': {'count': 100}, '7': {'count': 100}, '35': {'count': 100}, '82': {'count': 100}, '66': {'count': 100}, '90': {'count': 100}, '67': {'count': 100}, '91': {'count': 100}, '10': {'count': 100}, '78': {'count': 100}, '54': {'count': 100}, '89': {'count': 100}, '18': {'count': 100}, '13': {'count': 100}, '50': {'count': 100}, '26': {'count': 100}, '83': {'count': 100}, '47': {'count': 100}, '95': {'count': 100}, '76': {'count': 100}, '59': {'count': 100}, '85': {'count': 100}, '19': {'count': 100}, '46': {'count': 100}, '1': {'count': 100}, '74': {'count': 100}, '60': {'count': 100}, '64': {'count': 100}, '45': {'count': 100}, '36': {'count': 100}, '87': {'count': 100}, '30': {'count': 100}, '99': {'count': 100}, '80': {'count': 100}, '28': {'count': 100}, '98': {'count': 100}, '12': {'count': 100}, '94': {'count': 100}, '68': {'count': 100}, '44': {'count': 100}, '31': {'count': 100}, '79': {'count': 100}, '34': {'count': 100}, '55': {'count': 100}, '62': {'count': 100}, '96': {'count': 100}, '84': {'count': 100}, '38': {'count': 100}, '86': {'count': 100}, '5': {'count': 100}, '48': {'count': 100}, '3': {'count': 100}, '88': {'count': 100}}}}
CIFAR100Clustering (Alex Krizhevsky, 2009) ['eng'] ImageClustering i2t [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 100, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'labels': {'49': {'count': 100}, '33': {'count': 100}, '72': {'count': 100}, '51': {'count': 100}, '71': {'count': 100}, '92': {'count': 100}, '15': {'count': 100}, '14': {'count': 100}, '23': {'count': 100}, '0': {'count': 100}, '75': {'count': 100}, '81': {'count': 100}, '69': {'count': 100}, '40': {'count': 100}, '43': {'count': 100}, '97': {'count': 100}, '70': {'count': 100}, '53': {'count': 100}, '29': {'count': 100}, '21': {'count': 100}, '16': {'count': 100}, '39': {'count': 100}, '8': {'count': 100}, '20': {'count': 100}, '61': {'count': 100}, '41': {'count': 100}, '93': {'count': 100}, '56': {'count': 100}, '73': {'count': 100}, '58': {'count': 100}, '11': {'count': 100}, '25': {'count': 100}, '37': {'count': 100}, '63': {'count': 100}, '24': {'count': 100}, '22': {'count': 100}, '17': {'count': 100}, '4': {'count': 100}, '6': {'count': 100}, '9': {'count': 100}, '57': {'count': 100}, '2': {'count': 100}, '32': {'count': 100}, '52': {'count': 100}, '42': {'count': 100}, '77': {'count': 100}, '27': {'count': 100}, '65': {'count': 100}, '7': {'count': 100}, '35': {'count': 100}, '82': {'count': 100}, '66': {'count': 100}, '90': {'count': 100}, '67': {'count': 100}, '91': {'count': 100}, '10': {'count': 100}, '78': {'count': 100}, '54': {'count': 100}, '89': {'count': 100}, '18': {'count': 100}, '13': {'count': 100}, '50': {'count': 100}, '26': {'count': 100}, '83': {'count': 100}, '47': {'count': 100}, '95': {'count': 100}, '76': {'count': 100}, '59': {'count': 100}, '85': {'count': 100}, '19': {'count': 100}, '46': {'count': 100}, '1': {'count': 100}, '74': {'count': 100}, '60': {'count': 100}, '64': {'count': 100}, '45': {'count': 100}, '36': {'count': 100}, '87': {'count': 100}, '30': {'count': 100}, '99': {'count': 100}, '80': {'count': 100}, '28': {'count': 100}, '98': {'count': 100}, '12': {'count': 100}, '94': {'count': 100}, '68': {'count': 100}, '44': {'count': 100}, '31': {'count': 100}, '79': {'count': 100}, '34': {'count': 100}, '55': {'count': 100}, '62': {'count': 100}, '96': {'count': 100}, '84': {'count': 100}, '38': {'count': 100}, '86': {'count': 100}, '5': {'count': 100}, '48': {'count': 100}, '3': {'count': 100}, '88': {'count': 100}}}}
CIFAR100ZeroShot (Alex Krizhevsky, 2009) ['eng'] ZeroShotClassification i2t [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 100, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'min_label_text_length': 17, 'average_label_text_length': 20.24, 'max_label_text_length': 27, 'labels': {'49': {'count': 100}, '33': {'count': 100}, '72': {'count': 100}, '51': {'count': 100}, '71': {'count': 100}, '92': {'count': 100}, '15': {'count': 100}, '14': {'count': 100}, '23': {'count': 100}, '0': {'count': 100}, '75': {'count': 100}, '81': {'count': 100}, '69': {'count': 100}, '40': {'count': 100}, '43': {'count': 100}, '97': {'count': 100}, '70': {'count': 100}, '53': {'count': 100}, '29': {'count': 100}, '21': {'count': 100}, '16': {'count': 100}, '39': {'count': 100}, '8': {'count': 100}, '20': {'count': 100}, '61': {'count': 100}, '41': {'count': 100}, '93': {'count': 100}, '56': {'count': 100}, '73': {'count': 100}, '58': {'count': 100}, '11': {'count': 100}, '25': {'count': 100}, '37': {'count': 100}, '63': {'count': 100}, '24': {'count': 100}, '22': {'count': 100}, '17': {'count': 100}, '4': {'count': 100}, '6': {'count': 100}, '9': {'count': 100}, '57': {'count': 100}, '2': {'count': 100}, '32': {'count': 100}, '52': {'count': 100}, '42': {'count': 100}, '77': {'count': 100}, '27': {'count': 100}, '65': {'count': 100}, '7': {'count': 100}, '35': {'count': 100}, '82': {'count': 100}, '66': {'count': 100}, '90': {'count': 100}, '67': {'count': 100}, '91': {'count': 100}, '10': {'count': 100}, '78': {'count': 100}, '54': {'count': 100}, '89': {'count': 100}, '18': {'count': 100}, '13': {'count': 100}, '50': {'count': 100}, '26': {'count': 100}, '83': {'count': 100}, '47': {'count': 100}, '95': {'count': 100}, '76': {'count': 100}, '59': {'count': 100}, '85': {'count': 100}, '19': {'count': 100}, '46': {'count': 100}, '1': {'count': 100}, '74': {'count': 100}, '60': {'count': 100}, '64': {'count': 100}, '45': {'count': 100}, '36': {'count': 100}, '87': {'count': 100}, '30': {'count': 100}, '99': {'count': 100}, '80': {'count': 100}, '28': {'count': 100}, '98': {'count': 100}, '12': {'count': 100}, '94': {'count': 100}, '68': {'count': 100}, '44': {'count': 100}, '31': {'count': 100}, '79': {'count': 100}, '34': {'count': 100}, '55': {'count': 100}, '62': {'count': 100}, '96': {'count': 100}, '84': {'count': 100}, '38': {'count': 100}, '86': {'count': 100}, '5': {'count': 100}, '48': {'count': 100}, '3': {'count': 100}, '88': {'count': 100}}}}
CIFAR10Clustering (Alex Krizhevsky, 2009) ['eng'] ImageClustering i2i [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 10, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'labels': {'3': {'count': 1000}, '8': {'count': 1000}, '0': {'count': 1000}, '6': {'count': 1000}, '1': {'count': 1000}, '9': {'count': 1000}, '5': {'count': 1000}, '7': {'count': 1000}, '4': {'count': 1000}, '2': {'count': 1000}}}}
CIFAR10ZeroShot (Alex Krizhevsky, 2009) ['eng'] ZeroShotClassification i2t [Web] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 10, 'min_image_width': 32, 'average_image_width': 32.0, 'max_image_width': 32, 'min_image_height': 32, 'average_image_height': 32.0, 'max_image_height': 32, 'min_label_text_length': 17, 'average_label_text_length': 19.0, 'max_label_text_length': 24, 'labels': {'3': {'count': 1000}, '8': {'count': 1000}, '0': {'count': 1000}, '6': {'count': 1000}, '1': {'count': 1000}, '9': {'count': 1000}, '5': {'count': 1000}, '7': {'count': 1000}, '4': {'count': 1000}, '2': {'count': 1000}}}}
CIRRIT2IRetrieval (Liu et al., 2021) ['eng'] Any2AnyRetrieval it2i [Encyclopaedic] {'test': 25721} {'test': {'number_of_characters': 244365, 'num_samples': 25721, 'num_queries': 4170, 'num_documents': 21551, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 21551, 'min_query_length': 10, 'average_query_length': 58.6, 'max_query_length': 244, 'unique_queries': 4152, 'num_query_images': 4170, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.01, 'max_relevant_docs_per_query': 3, 'unique_relevant_docs': 2199}}
CLEVRCountZeroShot (Johnson et al., 2017) ['eng'] ZeroShotClassification i2t [Constructed] {'test': 15000} {'test': {'num_samples': 15000, 'unique_num_labels': 8, 'min_image_width': 480, 'average_image_width': 480.0, 'max_image_width': 480, 'min_image_height': 320, 'average_image_height': 320.0, 'max_image_height': 320, 'min_label_text_length': 24, 'average_label_text_length': 25.12, 'max_label_text_length': 26, 'labels': {'0': {'count': 1898}, '2': {'count': 1843}, '1': {'count': 1925}, '5': {'count': 1941}, '6': {'count': 1877}, '4': {'count': 1808}, '3': {'count': 1852}, '7': {'count': 1856}}}}
CLEVRZeroShot (Johnson et al., 2017) ['eng'] ZeroShotClassification i2t [Constructed] {'test': 15000} {'test': {'num_samples': 15000, 'unique_num_labels': 6, 'min_image_width': 480, 'average_image_width': 480.0, 'max_image_width': 480, 'min_image_height': 320, 'average_image_height': 320.0, 'max_image_height': 320, 'min_label_text_length': 8, 'average_label_text_length': 14.67, 'max_label_text_length': 20, 'labels': {'2': {'count': 3381}, '3': {'count': 2370}, '5': {'count': 1220}, '1': {'count': 3676}, '4': {'count': 1357}, '0': {'count': 2996}}}}
CLSClusteringP2P.v2 (Yudong Li, 2022) ['cmn'] Clustering p2p [Academic, Written] None None
CLSClusteringS2S.v2 (Yudong Li, 2022) ['cmn'] Clustering s2s [Academic, Written] None None
CMedQAv1-reranking (Zhang et al., 2017) ['cmn'] Reranking s2s [Medical, Written] None None
CMedQAv2-reranking (S. Zhang, 2018) ['cmn'] Reranking s2s [Medical, Written] None None
COIRCodeSearchNetRetrieval (Husain et al., 2019) ['go', 'java', 'javascript', 'php', 'python', 'ruby'] Retrieval p2p [Programming, Written] {'test': 1056326} {'test': {'number_of_characters': 36843313, 'num_samples': 1056326, 'num_queries': 52561, 'num_documents': 1003765, 'min_document_length': 54, 'average_document_length': 34.71, 'max_document_length': 334374, 'unique_documents': 1003765, 'min_query_length': 2, 'average_query_length': 38.19, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 14574651, 'num_samples': 295228, 'num_queries': 14918, 'num_documents': 280310, 'min_document_length': 95, 'average_document_length': 49.99, 'max_document_length': 14008, 'unique_documents': 280310, 'min_query_length': 2, 'average_query_length': 37.58, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 2587540, 'num_samples': 68145, 'num_queries': 3291, 'num_documents': 64854, 'min_document_length': 87, 'average_document_length': 37.9, 'max_document_length': 334374, 'unique_documents': 64854, 'min_query_length': 2, 'average_query_length': 39.41, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 3641108, 'num_samples': 190562, 'num_queries': 8122, 'num_documents': 182440, 'min_document_length': 54, 'average_document_length': 17.96, 'max_document_length': 5280, 'unique_documents': 182440, 'min_query_length': 2, 'average_query_length': 44.92, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 629446, 'num_samples': 28831, 'num_queries': 1261, 'num_documents': 27570, 'min_document_length': 83, 'average_document_length': 20.83, 'max_document_length': 3992, 'unique_documents': 27570, 'min_query_length': 2, 'average_query_length': 43.73, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 6791137, 'num_samples': 191821, 'num_queries': 10955, 'num_documents': 180866, 'min_document_length': 77, 'average_document_length': 35.55, 'max_document_length': 7615, 'unique_documents': 180866, 'min_query_length': 2, 'average_query_length': 33.02, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 8619431, 'num_samples': 281739, 'num_queries': 14014, 'num_documents': 267725, 'min_document_length': 94, 'average_document_length': 30.2, 'max_document_length': 4904, 'unique_documents': 267725, 'min_query_length': 2, 'average_query_length': 38.21, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}}
CPUSpeedTask ['eng'] Speed s2s [Fiction, Written] None None
CQADupstack-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackAndroid-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackAndroidRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Non-fiction, Programming, Web, Written] None None
CQADupstackAndroidRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackEnglish-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackEnglishRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Written] None None
CQADupstackEnglishRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackGaming-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackGamingRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Web, Written] None None
CQADupstackGamingRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackGis-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackGisRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackGisRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackMathematica-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackMathematicaRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
CQADupstackMathematicaRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackPhysics-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackPhysicsRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
CQADupstackPhysicsRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackProgrammers-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackProgrammersRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Non-fiction, Programming, Written] None None
CQADupstackProgrammersRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval None [Academic, Non-fiction, Programming, Web, Written] None None
CQADupstackRetrieval-Fa ['fas'] Retrieval None [Web] None None
CQADupstackStats-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackStatsRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
CQADupstackStatsRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackTex-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackTexRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackTexRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackUnix-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackUnixRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Programming, Web, Written] None None
CQADupstackUnixRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackWebmasters-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackWebmastersRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Web, Written] None None
CQADupstackWebmastersRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CQADupstackWordpress-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
CQADupstackWordpressRetrieval (Hoogeveen et al., 2015) ['eng'] Retrieval s2p [Programming, Web, Written] None None
CQADupstackWordpressRetrieval-Fa ['fas'] Retrieval s2p [Web] None None
CSFDCZMovieReviewSentimentClassification (Michal Štefánik, 2023) ['ces'] Classification s2s [Reviews, Written] None None
CSFDSKMovieReviewSentimentClassification (Michal Štefánik, 2023) ['slk'] Classification s2s [Reviews, Written] None None
CTKFactsNLI (Ullrich et al., 2023) ['ces'] PairClassification s2s [News, Written] None None
CUADAffiliateLicenseLicenseeLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADAffiliateLicenseLicensorLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADAntiAssignmentLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADAuditRightsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADCapOnLiabilityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADChangeOfControlLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADCompetitiveRestrictionExceptionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADCovenantNotToSueLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADEffectiveDateLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADExclusivityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADExpirationDateLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADGoverningLawLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADIPOwnershipAssignmentLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADInsuranceLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADIrrevocableOrPerpetualLicenseLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADJointIPOwnershipLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADLicenseGrantLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADLiquidatedDamagesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADMinimumCommitmentLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADMostFavoredNationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNoSolicitOfCustomersLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNoSolicitOfEmployeesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNonCompeteLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNonDisparagementLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNonTransferableLicenseLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADNoticePeriodToTerminateRenewalLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADPostTerminationServicesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADPriceRestrictionsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADRenewalTermLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADRevenueProfitSharingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADRofrRofoRofnLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADSourceCodeEscrowLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADTerminationForConvenienceLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADThirdPartyBeneficiaryLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADUncappedLiabilityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADUnlimitedAllYouCanEatLicenseLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADVolumeRestrictionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUADWarrantyDurationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CUB200I2IRetrieval (Welinder et al., 2010) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 11588} {'test': {'number_of_characters': 0, 'num_samples': 11588, 'num_queries': 5794, 'num_documents': 5794, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 5794, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 5794, 'min_relevant_docs_per_query': 10, 'average_relevant_docs_per_query': 28.26, 'max_relevant_docs_per_query': 29, 'unique_relevant_docs': 5794}}
CUREv1 ['eng', 'fra', 'spa'] Retrieval s2p [Academic, Medical, Written] None None
CVBenchCount (Tong et al., 2024) ['eng'] VisionCentric it2t [Academic] {'test': 805} {'test': {'number_of_characters': 27095, 'num_samples': 805, 'num_queries': 788, 'num_documents': 17, 'min_document_length': 1, 'average_document_length': 1.41, 'max_document_length': 2, 'unique_documents': 17, 'min_document_image_width': 0, 'average_document_image_width': 0, 'max_document_image_width': 0, 'min_document_image_height': 0, 'average_document_image_height': 0, 'max_document_image_height': 0, 'num_document_images': 0, 'min_query_length': 30, 'average_query_length': 34.35, 'max_query_length': 45, 'unique_queries': 197, 'num_query_images': 788, 'min_query_image_width': 181, 'average_query_image_width': 631.31, 'max_query_image_width': 2200, 'min_query_image_height': 200, 'average_query_image_height': 757.68, 'max_query_image_height': 2200, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 17}}
CVBenchDepth (Tong et al., 2024) ['eng'] VisionCentric it2t [Academic] {'test': 669} {'test': {'number_of_characters': 82092, 'num_samples': 669, 'num_queries': 600, 'num_documents': 69, 'min_document_length': 3, 'average_document_length': 6.75, 'max_document_length': 17, 'unique_documents': 69, 'min_document_image_width': 0, 'average_document_image_width': 0, 'max_document_image_width': 0, 'min_document_image_height': 0, 'average_document_image_height': 0, 'max_document_image_height': 0, 'num_document_images': 0, 'min_query_length': 130, 'average_query_length': 136.04, 'max_query_length': 147, 'unique_queries': 279, 'num_query_images': 600, 'min_query_image_width': 427, 'average_query_image_width': 715.99, 'max_query_image_width': 900, 'min_query_image_height': 561, 'average_query_image_height': 1090.96, 'max_query_image_height': 1600, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 69}}
CVBenchDistance (Tong et al., 2024) ['eng'] VisionCentric it2t [Academic] {'test': 656} {'test': {'number_of_characters': 127804, 'num_samples': 656, 'num_queries': 600, 'num_documents': 56, 'min_document_length': 3, 'average_document_length': 6.46, 'max_document_length': 12, 'unique_documents': 56, 'min_document_image_width': 0, 'average_document_image_width': 0, 'max_document_image_width': 0, 'min_document_image_height': 0, 'average_document_image_height': 0, 'max_document_image_height': 0, 'num_document_images': 0, 'min_query_length': 204, 'average_query_length': 212.4, 'max_query_length': 223, 'unique_queries': 381, 'num_query_images': 600, 'min_query_image_width': 427, 'average_query_image_width': 721.0, 'max_query_image_width': 900, 'min_query_image_height': 561, 'average_query_image_height': 1099.29, 'max_query_image_height': 1600, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 56}}
CVBenchRelation (Tong et al., 2024) ['eng'] VisionCentric it2t [Academic] {'test': 654} {'test': {'number_of_characters': 117967, 'num_samples': 654, 'num_queries': 650, 'num_documents': 4, 'min_document_length': 4, 'average_document_length': 4.75, 'max_document_length': 5, 'unique_documents': 4, 'min_document_image_width': 0, 'average_document_image_width': 0, 'max_document_image_width': 0, 'min_document_image_height': 0, 'average_document_image_height': 0, 'max_document_image_height': 0, 'num_document_images': 0, 'min_query_length': 132, 'average_query_length': 181.46, 'max_query_length': 224, 'unique_queries': 580, 'num_query_images': 650, 'min_query_image_width': 190, 'average_query_image_width': 448.45, 'max_query_image_width': 2200, 'min_query_image_height': 189, 'average_query_image_height': 546.32, 'max_query_image_height': 2200, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 4}}
Caltech101 (Li Fei-Fei, 2004) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 6084} {'test': {'num_samples': 6084, 'unique_num_labels': 102, 'min_image_width': 80, 'average_image_width': 311.72, 'max_image_width': 3481, 'min_image_height': 101, 'average_image_height': 241.84, 'max_image_height': 3999, 'labels': {'4': {'count': 437}, '37': {'count': 405}, '38': {'count': 405}, '57': {'count': 170}, '66': {'count': 768}, '0': {'count': 25}, '1': {'count': 770}, '2': {'count': 12}, '3': {'count': 12}, '5': {'count': 17}, '6': {'count': 24}, '7': {'count': 16}, '8': {'count': 3}, '9': {'count': 98}, '10': {'count': 68}, '11': {'count': 13}, '12': {'count': 55}, '13': {'count': 61}, '14': {'count': 20}, '15': {'count': 13}, '16': {'count': 93}, '17': {'count': 17}, '18': {'count': 29}, '19': {'count': 32}, '20': {'count': 77}, '22': {'count': 39}, '23': {'count': 43}, '24': {'count': 40}, '25': {'count': 20}, '26': {'count': 21}, '27': {'count': 27}, '28': {'count': 37}, '29': {'count': 22}, '30': {'count': 35}, '31': {'count': 38}, '32': {'count': 45}, '33': {'count': 34}, '34': {'count': 23}, '35': {'count': 34}, '36': {'count': 55}, '39': {'count': 37}, '40': {'count': 37}, '41': {'count': 15}, '42': {'count': 4}, '43': {'count': 4}, '44': {'count': 21}, '45': {'count': 69}, '46': {'count': 70}, '47': {'count': 12}, '48': {'count': 24}, '49': {'count': 58}, '50': {'count': 50}, '51': {'count': 1}, '52': {'count': 34}, '53': {'count': 56}, '54': {'count': 84}, '55': {'count': 31}, '56': {'count': 51}, '58': {'count': 48}, '59': {'count': 11}, '60': {'count': 36}, '61': {'count': 13}, '62': {'count': 10}, '63': {'count': 57}, '64': {'count': 2}, '65': {'count': 46}, '67': {'count': 25}, '68': {'count': 5}, '69': {'count': 9}, '70': {'count': 17}, '71': {'count': 8}, '72': {'count': 15}, '73': {'count': 23}, '74': {'count': 4}, '75': {'count': 27}, '76': {'count': 52}, '77': {'count': 29}, '78': {'count': 19}, '79': {'count': 10}, '80': {'count': 33}, '81': {'count': 9}, '82': {'count': 54}, '83': {'count': 27}, '84': {'count': 5}, '85': {'count': 34}, '86': {'count': 15}, '87': {'count': 56}, '88': {'count': 29}, '89': {'count': 34}, '90': {'count': 5}, '91': {'count': 55}, '92': {'count': 19}, '93': {'count': 56}, '94': {'count': 45}, '95': {'count': 209}, '96': {'count': 7}, '97': {'count': 29}, '98': {'count': 4}, '99': {'count': 26}, '100': {'count': 9}, '101': {'count': 30}, '21': {'count': 17}}}}
Caltech101ZeroShot (Li Fei-Fei, 2004) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 1986} {'test': {'num_samples': 1986, 'unique_num_labels': 63, 'min_image_width': 105, 'average_image_width': 277.19, 'max_image_width': 300, 'min_image_height': 114, 'average_image_height': 255.33, 'max_image_height': 300, 'min_label_text_length': 17, 'average_label_text_length': 21.88, 'max_label_text_length': 31, 'labels': {'36': {'count': 55}, '39': {'count': 37}, '40': {'count': 37}, '41': {'count': 15}, '42': {'count': 4}, '43': {'count': 4}, '44': {'count': 21}, '45': {'count': 69}, '46': {'count': 70}, '47': {'count': 12}, '48': {'count': 24}, '49': {'count': 58}, '50': {'count': 50}, '51': {'count': 1}, '52': {'count': 34}, '53': {'count': 56}, '54': {'count': 84}, '55': {'count': 31}, '56': {'count': 51}, '58': {'count': 48}, '59': {'count': 11}, '60': {'count': 36}, '61': {'count': 13}, '62': {'count': 10}, '63': {'count': 57}, '64': {'count': 2}, '65': {'count': 46}, '67': {'count': 25}, '68': {'count': 5}, '69': {'count': 9}, '70': {'count': 17}, '71': {'count': 8}, '72': {'count': 15}, '73': {'count': 23}, '74': {'count': 4}, '75': {'count': 27}, '76': {'count': 52}, '77': {'count': 29}, '78': {'count': 19}, '79': {'count': 10}, '80': {'count': 33}, '81': {'count': 9}, '82': {'count': 54}, '83': {'count': 27}, '84': {'count': 5}, '85': {'count': 34}, '86': {'count': 15}, '87': {'count': 56}, '88': {'count': 29}, '89': {'count': 34}, '90': {'count': 5}, '91': {'count': 55}, '92': {'count': 19}, '93': {'count': 56}, '94': {'count': 45}, '95': {'count': 209}, '96': {'count': 7}, '97': {'count': 29}, '98': {'count': 4}, '99': {'count': 26}, '100': {'count': 9}, '101': {'count': 30}, '21': {'count': 17}}}}
CanadaTaxCourtOutcomesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CataloniaTweetClassification ['cat', 'spa'] Classification s2s [Government, Social, Written] None None
ChemHotpotQARetrieval (Kasmaee et al., 2024) ['eng'] Retrieval s2p [Chemistry] None None
ChemNQRetrieval (Kasmaee et al., 2024) ['eng'] Retrieval s2p [Chemistry] None None
ClimateFEVER (Thomas Diggelmann, 2021) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
ClimateFEVER-Fa ['fas'] Retrieval s2p [Web] None None
ClimateFEVER-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Encyclopaedic, Written] None None
ClimateFEVER.v2 (Thomas Diggelmann, 2021) ['eng'] Retrieval s2p [Academic, Written] None None
ClimateFEVERHardNegatives (Thomas Diggelmann, 2021) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
ClusTREC-Covid ['eng'] Clustering p2p [Academic, Medical, Written] {'test': 4568} {'test': {'num_samples': 4568, 'number_of_characters': 2977845, 'min_text_length': 14, 'average_text_length': 651.89, 'max_text_length': 8364, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 100, 'unique_labels': 50, 'labels': {'coronavirus origin': {'count': 100}, 'coronavirus response to weather changes': {'count': 100}, 'coronavirus immunity': {'count': 78}, 'how do people die from the coronavirus': {'count': 100}, 'animal models of COVID-19': {'count': 100}, 'coronavirus test rapid testing': {'count': 100}, 'serological tests for coronavirus': {'count': 100}, 'coronavirus under reporting': {'count': 100}, 'coronavirus in Canada': {'count': 92}, 'coronavirus social distancing impact': {'count': 100}, 'coronavirus hospital rationing': {'count': 100}, 'coronavirus quarantine': {'count': 100}, 'how does coronavirus spread': {'count': 100}, 'coronavirus super spreaders': {'count': 98}, 'coronavirus outside body': {'count': 34}, 'how long does coronavirus survive on surfaces': {'count': 74}, 'coronavirus clinical trials': {'count': 100}, 'masks prevent coronavirus': {'count': 100}, 'what alcohol sanitizer kills coronavirus': {'count': 64}, 'coronavirus and ACE inhibitors': {'count': 100}, 'coronavirus mortality': {'count': 100}, 'coronavirus heart impacts': {'count': 100}, 'coronavirus hypertension': {'count': 74}, 'coronavirus diabetes': {'count': 100}, 'coronavirus biomarkers': {'count': 100}, 'coronavirus early symptoms': {'count': 100}, 'coronavirus asymptomatic': {'count': 100}, 'coronavirus hydroxychloroquine': {'count': 100}, 'coronavirus drug repurposing': {'count': 100}, 'coronavirus remdesivir': {'count': 100}, 'difference between coronavirus and flu': {'count': 100}, 'coronavirus subtypes': {'count': 6}, 'coronavirus vaccine candidates': {'count': 36}, 'coronavirus recovery': {'count': 100}, 'coronavirus public datasets': {'count': 100}, 'SARS-CoV-2 spike structure': {'count': 100}, 'SARS-CoV-2 phylogenetic analysis': {'count': 100}, 'COVID inflammatory response': {'count': 100}, 'COVID-19 cytokine storm': {'count': 100}, 'coronavirus mutations': {'count': 100}, 'COVID-19 in African-Americans': {'count': 100}, 'Vitamin D and COVID-19': {'count': 100}, 'violence during pandemic': {'count': 100}, 'impact of masks on coronavirus transmission': {'count': 100}, 'coronavirus mental health impact': {'count': 100}, 'dexamethasone coronavirus': {'count': 92}, 'COVID-19 outcomes in children': {'count': 100}, 'school reopening coronavirus': {'count': 100}, 'post-infection COVID-19 immunity': {'count': 88}, 'mRNA vaccine coronavirus': {'count': 32}}, 'hf_subset_descriptive_stats': {'title and abstract': {'num_samples': 2284, 'number_of_characters': 2755462, 'min_text_length': 14, 'average_text_length': 1206.42, 'max_text_length': 8364, 'min_labels_per_text': 3, 'average_labels_per_text': 1.0, 'max_labels_per_text': 50, 'unique_labels': 50, 'labels': {'coronavirus origin': {'count': 50}, 'coronavirus response to weather changes': {'count': 50}, 'coronavirus immunity': {'count': 39}, 'how do people die from the coronavirus': {'count': 50}, 'animal models of COVID-19': {'count': 50}, 'coronavirus test rapid testing': {'count': 50}, 'serological tests for coronavirus': {'count': 50}, 'coronavirus under reporting': {'count': 50}, 'coronavirus in Canada': {'count': 46}, 'coronavirus social distancing impact': {'count': 50}, 'coronavirus hospital rationing': {'count': 50}, 'coronavirus quarantine': {'count': 50}, 'how does coronavirus spread': {'count': 50}, 'coronavirus super spreaders': {'count': 49}, 'coronavirus outside body': {'count': 17}, 'how long does coronavirus survive on surfaces': {'count': 37}, 'coronavirus clinical trials': {'count': 50}, 'masks prevent coronavirus': {'count': 50}, 'what alcohol sanitizer kills coronavirus': {'count': 32}, 'coronavirus and ACE inhibitors': {'count': 50}, 'coronavirus mortality': {'count': 50}, 'coronavirus heart impacts': {'count': 50}, 'coronavirus hypertension': {'count': 37}, 'coronavirus diabetes': {'count': 50}, 'coronavirus biomarkers': {'count': 50}, 'coronavirus early symptoms': {'count': 50}, 'coronavirus asymptomatic': {'count': 50}, 'coronavirus hydroxychloroquine': {'count': 50}, 'coronavirus drug repurposing': {'count': 50}, 'coronavirus remdesivir': {'count': 50}, 'difference between coronavirus and flu': {'count': 50}, 'coronavirus subtypes': {'count': 3}, 'coronavirus vaccine candidates': {'count': 18}, 'coronavirus recovery': {'count': 50}, 'coronavirus public datasets': {'count': 50}, 'SARS-CoV-2 spike structure': {'count': 50}, 'SARS-CoV-2 phylogenetic analysis': {'count': 50}, 'COVID inflammatory response': {'count': 50}, 'COVID-19 cytokine storm': {'count': 50}, 'coronavirus mutations': {'count': 50}, 'COVID-19 in African-Americans': {'count': 50}, 'Vitamin D and COVID-19': {'count': 50}, 'violence during pandemic': {'count': 50}, 'impact of masks on coronavirus transmission': {'count': 50}, 'coronavirus mental health impact': {'count': 50}, 'dexamethasone coronavirus': {'count': 46}, 'COVID-19 outcomes in children': {'count': 50}, 'school reopening coronavirus': {'count': 50}, 'post-infection COVID-19 immunity': {'count': 44}, 'mRNA vaccine coronavirus': {'count': 16}}}, 'title': {'num_samples': 2284, 'number_of_characters': 222383, 'min_text_length': 14, 'average_text_length': 97.37, 'max_text_length': 348, 'min_labels_per_text': 3, 'average_labels_per_text': 1.0, 'max_labels_per_text': 50, 'unique_labels': 50, 'labels': {'coronavirus origin': {'count': 50}, 'coronavirus response to weather changes': {'count': 50}, 'coronavirus immunity': {'count': 39}, 'how do people die from the coronavirus': {'count': 50}, 'animal models of COVID-19': {'count': 50}, 'coronavirus test rapid testing': {'count': 50}, 'serological tests for coronavirus': {'count': 50}, 'coronavirus under reporting': {'count': 50}, 'coronavirus in Canada': {'count': 46}, 'coronavirus social distancing impact': {'count': 50}, 'coronavirus hospital rationing': {'count': 50}, 'coronavirus quarantine': {'count': 50}, 'how does coronavirus spread': {'count': 50}, 'coronavirus super spreaders': {'count': 49}, 'coronavirus outside body': {'count': 17}, 'how long does coronavirus survive on surfaces': {'count': 37}, 'coronavirus clinical trials': {'count': 50}, 'masks prevent coronavirus': {'count': 50}, 'what alcohol sanitizer kills coronavirus': {'count': 32}, 'coronavirus and ACE inhibitors': {'count': 50}, 'coronavirus mortality': {'count': 50}, 'coronavirus heart impacts': {'count': 50}, 'coronavirus hypertension': {'count': 37}, 'coronavirus diabetes': {'count': 50}, 'coronavirus biomarkers': {'count': 50}, 'coronavirus early symptoms': {'count': 50}, 'coronavirus asymptomatic': {'count': 50}, 'coronavirus hydroxychloroquine': {'count': 50}, 'coronavirus drug repurposing': {'count': 50}, 'coronavirus remdesivir': {'count': 50}, 'difference between coronavirus and flu': {'count': 50}, 'coronavirus subtypes': {'count': 3}, 'coronavirus vaccine candidates': {'count': 18}, 'coronavirus recovery': {'count': 50}, 'coronavirus public datasets': {'count': 50}, 'SARS-CoV-2 spike structure': {'count': 50}, 'SARS-CoV-2 phylogenetic analysis': {'count': 50}, 'COVID inflammatory response': {'count': 50}, 'COVID-19 cytokine storm': {'count': 50}, 'coronavirus mutations': {'count': 50}, 'COVID-19 in African-Americans': {'count': 50}, 'Vitamin D and COVID-19': {'count': 50}, 'violence during pandemic': {'count': 50}, 'impact of masks on coronavirus transmission': {'count': 50}, 'coronavirus mental health impact': {'count': 50}, 'dexamethasone coronavirus': {'count': 46}, 'COVID-19 outcomes in children': {'count': 50}, 'school reopening coronavirus': {'count': 50}, 'post-infection COVID-19 immunity': {'count': 44}, 'mRNA vaccine coronavirus': {'count': 16}}}}}}
CmedqaRetrieval ['cmn'] Retrieval s2p [Medical, Written] None None
Cmnli ['cmn'] PairClassification s2s None None
CodeEditSearchRetrieval (Niklas Muennighoff, 2023) ['c', 'c++', 'go', 'java', 'javascript', 'php', 'python', 'ruby', 'rust', 'scala', 'shell', 'swift', 'typescript'] Retrieval p2p [Programming, Written] {'train': 26000} {'train': {'number_of_characters': 935841, 'num_samples': 26000, 'num_queries': 13000, 'num_documents': 13000, 'min_document_length': 18, 'average_document_length': 70.99, 'max_document_length': 2532, 'unique_documents': 13000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 13000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 70519, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 69.52, 'max_document_length': 1811, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 57880, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 56.88, 'max_document_length': 601, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'typescript': {'number_of_characters': 61092, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 60.09, 'max_document_length': 659, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 71797, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 70.8, 'max_document_length': 1529, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 67900, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 66.9, 'max_document_length': 751, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 63984, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 23, 'average_document_length': 62.98, 'max_document_length': 807, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 62927, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 21, 'average_document_length': 61.93, 'max_document_length': 766, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c': {'number_of_characters': 98588, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 20, 'average_document_length': 97.59, 'max_document_length': 1672, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'c++': {'number_of_characters': 115480, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 114.48, 'max_document_length': 1856, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'rust': {'number_of_characters': 68503, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 67.5, 'max_document_length': 2532, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'swift': {'number_of_characters': 58279, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 19, 'average_document_length': 57.28, 'max_document_length': 727, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'scala': {'number_of_characters': 65833, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 22, 'average_document_length': 64.83, 'max_document_length': 685, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'shell': {'number_of_characters': 73059, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 18, 'average_document_length': 72.06, 'max_document_length': 813, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}}
CodeFeedbackMT (Tianyu Zheng, 2024) ['eng'] Retrieval p2p [Programming, Written] {'test': 79660} {'test': {'number_of_characters': 156266302, 'num_samples': 79660, 'num_queries': 13277, 'num_documents': 66383, 'min_document_length': 127, 'average_document_length': 885.13, 'max_document_length': 32432, 'unique_documents': 66383, 'min_query_length': 2, 'average_query_length': 7344.18, 'max_query_length': 9403, 'unique_queries': 13277, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 13277}}
CodeFeedbackST (Xiangyang Li, 2024) ['eng'] Retrieval p2p [Programming, Written] {'test': 187832} {'test': {'number_of_characters': 260957682, 'num_samples': 187832, 'num_queries': 31306, 'num_documents': 156526, 'min_document_length': 26, 'average_document_length': 144.85, 'max_document_length': 13851, 'unique_documents': 156526, 'min_query_length': 1, 'average_query_length': 7611.46, 'max_query_length': 11354, 'unique_queries': 31306, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 31306}}
CodeRAGLibraryDocumentationSolutions (Zora Zhiruo Wang, 2024) ['python'] Reranking s2s [Programming] None None
CodeRAGOnlineTutorials (Zora Zhiruo Wang, 2024) ['python'] Reranking s2s [Programming] None None
CodeRAGProgrammingSolutions (Zora Zhiruo Wang, 2024) ['python'] Reranking s2s [Programming] None None
CodeRAGStackoverflowPosts (Zora Zhiruo Wang, 2024) ['python'] Reranking s2s [Programming] None None
CodeSearchNetCCRetrieval (Xiangyang Li, 2024) ['go', 'java', 'javascript', 'php', 'python', 'ruby'] Retrieval p2p [Programming, Written] {'test': 1058035} {'test': {'number_of_characters': 22407915, 'num_samples': 1058035, 'num_queries': 52561, 'num_documents': 1005474, 'min_document_length': 23, 'average_document_length': 20.29, 'max_document_length': 214210, 'unique_documents': 1005474, 'min_query_length': 2, 'average_query_length': 38.26, 'max_query_length': 2, 'unique_queries': 52561, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 52561, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 8792958, 'num_samples': 295570, 'num_queries': 14918, 'num_documents': 280652, 'min_document_length': 38, 'average_document_length': 29.33, 'max_document_length': 8326, 'unique_documents': 280652, 'min_query_length': 2, 'average_query_length': 37.63, 'max_query_length': 2, 'unique_queries': 14918, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14918}, 'javascript': {'number_of_characters': 1590642, 'num_samples': 68492, 'num_queries': 3291, 'num_documents': 65201, 'min_document_length': 40, 'average_document_length': 22.4, 'max_document_length': 214210, 'unique_documents': 65201, 'min_query_length': 2, 'average_query_length': 39.62, 'max_query_length': 2, 'unique_queries': 3291, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3291}, 'go': {'number_of_characters': 2264134, 'num_samples': 190857, 'num_queries': 8122, 'num_documents': 182735, 'min_document_length': 23, 'average_document_length': 10.39, 'max_document_length': 3589, 'unique_documents': 182735, 'min_query_length': 2, 'average_query_length': 45.0, 'max_query_length': 2, 'unique_queries': 8122, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8122}, 'ruby': {'number_of_characters': 391703, 'num_samples': 28849, 'num_queries': 1261, 'num_documents': 27588, 'min_document_length': 36, 'average_document_length': 12.2, 'max_document_length': 2244, 'unique_documents': 27588, 'min_query_length': 2, 'average_query_length': 43.76, 'max_query_length': 2, 'unique_queries': 1261, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1261}, 'java': {'number_of_characters': 4114584, 'num_samples': 192016, 'num_queries': 10955, 'num_documents': 181061, 'min_document_length': 38, 'average_document_length': 20.72, 'max_document_length': 5066, 'unique_documents': 181061, 'min_query_length': 2, 'average_query_length': 33.06, 'max_query_length': 2, 'unique_queries': 10955, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10955}, 'php': {'number_of_characters': 5253894, 'num_samples': 282251, 'num_queries': 14014, 'num_documents': 268237, 'min_document_length': 40, 'average_document_length': 17.59, 'max_document_length': 2995, 'unique_documents': 268237, 'min_query_length': 2, 'average_query_length': 38.28, 'max_query_length': 2, 'unique_queries': 14014, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 14014}}}}
CodeSearchNetRetrieval (Husain et al., 2019) ['go', 'java', 'javascript', 'php', 'python', 'ruby'] Retrieval p2p [Programming, Written] {'test': 12000} {'test': {'number_of_characters': 1950074, 'num_samples': 12000, 'num_queries': 6000, 'num_documents': 6000, 'min_document_length': 2, 'average_document_length': 324.01, 'max_document_length': 17533, 'unique_documents': 6000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 6000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 6000, 'hf_subset_descriptive_stats': {'python': {'number_of_characters': 467546, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 8, 'average_document_length': 466.55, 'max_document_length': 8636, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'javascript': {'number_of_characters': 187018, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 186.02, 'max_document_length': 7657, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'go': {'number_of_characters': 126213, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 14, 'average_document_length': 125.21, 'max_document_length': 1501, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'ruby': {'number_of_characters': 314818, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 313.82, 'max_document_length': 17533, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'java': {'number_of_characters': 691360, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 2, 'average_document_length': 690.36, 'max_document_length': 6473, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}, 'php': {'number_of_characters': 163119, 'num_samples': 2000, 'num_queries': 1000, 'num_documents': 1000, 'min_document_length': 5, 'average_document_length': 162.12, 'max_document_length': 1240, 'unique_documents': 1000, 'min_query_length': 1, 'average_query_length': 1.0, 'max_query_length': 1, 'unique_queries': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}}}
CodeTransOceanContest (Weixiang Yan, 2023) ['c++', 'python'] Retrieval p2p [Programming, Written] {'test': 1229} {'test': {'number_of_characters': 1744286, 'num_samples': 1229, 'num_queries': 221, 'num_documents': 1008, 'min_document_length': 8, 'average_document_length': 221.9, 'max_document_length': 4147, 'unique_documents': 1008, 'min_query_length': 8, 'average_query_length': 6880.58, 'max_query_length': 10852, 'unique_queries': 221, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 221}}
CodeTransOceanDL (Weixiang Yan, 2023) ['python'] Retrieval p2p [Programming, Written] {'test': 996} {'test': {'number_of_characters': 1543912, 'num_samples': 996, 'num_queries': 180, 'num_documents': 816, 'min_document_length': 376, 'average_document_length': 411.98, 'max_document_length': 8285, 'unique_documents': 816, 'min_query_length': 58, 'average_query_length': 6709.67, 'max_query_length': 8469, 'unique_queries': 180, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 180}}
ContractNLIConfidentialityOfAgreementLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIExplicitIdentificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIInclusionOfVerballyConveyedInformationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLILimitedUseLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLINoLicensingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLINoticeOnCompelledDisclosureLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIPermissibleAcquirementOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIPermissibleCopyLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIPermissibleDevelopmentOfSimilarInformationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIPermissiblePostAgreementPossessionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLIReturnOfConfidentialInformationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLISharingWithEmployeesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLISharingWithThirdPartiesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ContractNLISurvivalOfObligationsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Core17InstructionRetrieval (Orion Weller, 2024) ['eng'] InstructionRetrieval s2p [News, Written] {'test': 19919} {'test': {'num_samples': 19919, 'num_docs': 19899, 'num_queries': 20, 'number_of_characters': 44450333, 'min_document_length': 7, 'average_document_length': 2233.03, 'max_document_length': 2959, 'unique_docs': 19143, 'min_query_length': 55, 'average_query_length': 109.75, 'max_query_length': 278, 'unique_queries': 20, 'min_instruction_length': 102, 'average_instruction_length': 295.55, 'max_instruction_length': 811, 'unique_instructions': 20, 'min_changed_instruction_length': 151, 'average_changed_instruction_length': 355.2, 'max_changed_instruction_length': 837, 'unique_changed_instructions': 20, 'min_average_relevant_docs_per_query': 4, 'average_relevant_docs_per_query': 32.7, 'max_average_relevant_docs_per_query': 55, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}
CorporateLobbyingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
CosQA (Junjie Huang, 2021) ['eng', 'python'] Retrieval p2p [Programming, Written] {'test': 21104} {'test': {'number_of_characters': 5728450, 'num_samples': 21104, 'num_queries': 500, 'num_documents': 20604, 'min_document_length': 18, 'average_document_length': 0.89, 'max_document_length': 83, 'unique_documents': 20604, 'min_query_length': 88, 'average_query_length': 11420.09, 'max_query_length': 6396, 'unique_queries': 500, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 500}}
Country211 (Radford et al., 2021) ['eng'] ImageClassification i2i [Scene] {'test': 21100} {'test': {'num_samples': 21100, 'unique_num_labels': 211, 'min_image_width': 32, 'average_image_width': 468.59, 'max_image_width': 500, 'min_image_height': 37, 'average_image_height': 381.73, 'max_image_height': 500, 'labels': {'0': {'count': 100}, '1': {'count': 100}, '2': {'count': 100}, '3': {'count': 100}, '4': {'count': 100}, '5': {'count': 100}, '6': {'count': 100}, '7': {'count': 100}, '8': {'count': 100}, '9': {'count': 100}, '10': {'count': 100}, '11': {'count': 100}, '12': {'count': 100}, '13': {'count': 100}, '14': {'count': 100}, '15': {'count': 100}, '16': {'count': 100}, '17': {'count': 100}, '18': {'count': 100}, '19': {'count': 100}, '20': {'count': 100}, '21': {'count': 100}, '22': {'count': 100}, '23': {'count': 100}, '24': {'count': 100}, '25': {'count': 100}, '26': {'count': 100}, '27': {'count': 100}, '28': {'count': 100}, '29': {'count': 100}, '30': {'count': 100}, '31': {'count': 100}, '32': {'count': 100}, '33': {'count': 100}, '34': {'count': 100}, '35': {'count': 100}, '36': {'count': 100}, '37': {'count': 100}, '38': {'count': 100}, '39': {'count': 100}, '40': {'count': 100}, '41': {'count': 100}, '42': {'count': 100}, '43': {'count': 100}, '44': {'count': 100}, '45': {'count': 100}, '46': {'count': 100}, '47': {'count': 100}, '48': {'count': 100}, '49': {'count': 100}, '50': {'count': 100}, '51': {'count': 100}, '52': {'count': 100}, '53': {'count': 100}, '54': {'count': 100}, '55': {'count': 100}, '56': {'count': 100}, '57': {'count': 100}, '58': {'count': 100}, '59': {'count': 100}, '60': {'count': 100}, '61': {'count': 100}, '62': {'count': 100}, '63': {'count': 100}, '64': {'count': 100}, '65': {'count': 100}, '66': {'count': 100}, '67': {'count': 100}, '68': {'count': 100}, '69': {'count': 100}, '70': {'count': 100}, '71': {'count': 100}, '72': {'count': 100}, '73': {'count': 100}, '74': {'count': 100}, '75': {'count': 100}, '76': {'count': 100}, '77': {'count': 100}, '78': {'count': 100}, '79': {'count': 100}, '80': {'count': 100}, '81': {'count': 100}, '82': {'count': 100}, '83': {'count': 100}, '84': {'count': 100}, '85': {'count': 100}, '86': {'count': 100}, '87': {'count': 100}, '88': {'count': 100}, '89': {'count': 100}, '90': {'count': 100}, '91': {'count': 100}, '92': {'count': 100}, '93': {'count': 100}, '94': {'count': 100}, '95': {'count': 100}, '96': {'count': 100}, '97': {'count': 100}, '98': {'count': 100}, '99': {'count': 100}, '100': {'count': 100}, '101': {'count': 100}, '102': {'count': 100}, '103': {'count': 100}, '104': {'count': 100}, '105': {'count': 100}, '106': {'count': 100}, '107': {'count': 100}, '108': {'count': 100}, '109': {'count': 100}, '110': {'count': 100}, '111': {'count': 100}, '112': {'count': 100}, '113': {'count': 100}, '114': {'count': 100}, '115': {'count': 100}, '116': {'count': 100}, '117': {'count': 100}, '118': {'count': 100}, '119': {'count': 100}, '120': {'count': 100}, '121': {'count': 100}, '122': {'count': 100}, '123': {'count': 100}, '124': {'count': 100}, '125': {'count': 100}, '126': {'count': 100}, '127': {'count': 100}, '128': {'count': 100}, '129': {'count': 100}, '130': {'count': 100}, '131': {'count': 100}, '132': {'count': 100}, '133': {'count': 100}, '134': {'count': 100}, '135': {'count': 100}, '136': {'count': 100}, '137': {'count': 100}, '138': {'count': 100}, '139': {'count': 100}, '140': {'count': 100}, '141': {'count': 100}, '142': {'count': 100}, '143': {'count': 100}, '144': {'count': 100}, '145': {'count': 100}, '146': {'count': 100}, '147': {'count': 100}, '148': {'count': 100}, '149': {'count': 100}, '150': {'count': 100}, '151': {'count': 100}, '152': {'count': 100}, '153': {'count': 100}, '154': {'count': 100}, '155': {'count': 100}, '156': {'count': 100}, '157': {'count': 100}, '158': {'count': 100}, '159': {'count': 100}, '160': {'count': 100}, '161': {'count': 100}, '162': {'count': 100}, '163': {'count': 100}, '164': {'count': 100}, '165': {'count': 100}, '166': {'count': 100}, '167': {'count': 100}, '168': {'count': 100}, '169': {'count': 100}, '170': {'count': 100}, '171': {'count': 100}, '172': {'count': 100}, '173': {'count': 100}, '174': {'count': 100}, '175': {'count': 100}, '176': {'count': 100}, '177': {'count': 100}, '178': {'count': 100}, '179': {'count': 100}, '180': {'count': 100}, '181': {'count': 100}, '182': {'count': 100}, '183': {'count': 100}, '184': {'count': 100}, '185': {'count': 100}, '186': {'count': 100}, '187': {'count': 100}, '188': {'count': 100}, '189': {'count': 100}, '190': {'count': 100}, '191': {'count': 100}, '192': {'count': 100}, '193': {'count': 100}, '194': {'count': 100}, '195': {'count': 100}, '196': {'count': 100}, '197': {'count': 100}, '198': {'count': 100}, '199': {'count': 100}, '200': {'count': 100}, '201': {'count': 100}, '202': {'count': 100}, '203': {'count': 100}, '204': {'count': 100}, '205': {'count': 100}, '206': {'count': 100}, '207': {'count': 100}, '208': {'count': 100}, '209': {'count': 100}, '210': {'count': 100}}}}
Country211ZeroShot (Radford et al., 2021) ['eng'] ZeroShotClassification i2t [Scene] {'test': 21100} {'test': {'num_samples': 21100, 'unique_num_labels': 211, 'min_image_width': 32, 'average_image_width': 468.59, 'max_image_width': 500, 'min_image_height': 37, 'average_image_height': 381.73, 'max_image_height': 500, 'min_label_text_length': 37, 'average_label_text_length': 41.95, 'max_label_text_length': 69, 'labels': {'0': {'count': 100}, '1': {'count': 100}, '2': {'count': 100}, '3': {'count': 100}, '4': {'count': 100}, '5': {'count': 100}, '6': {'count': 100}, '7': {'count': 100}, '8': {'count': 100}, '9': {'count': 100}, '10': {'count': 100}, '11': {'count': 100}, '12': {'count': 100}, '13': {'count': 100}, '14': {'count': 100}, '15': {'count': 100}, '16': {'count': 100}, '17': {'count': 100}, '18': {'count': 100}, '19': {'count': 100}, '20': {'count': 100}, '21': {'count': 100}, '22': {'count': 100}, '23': {'count': 100}, '24': {'count': 100}, '25': {'count': 100}, '26': {'count': 100}, '27': {'count': 100}, '28': {'count': 100}, '29': {'count': 100}, '30': {'count': 100}, '31': {'count': 100}, '32': {'count': 100}, '33': {'count': 100}, '34': {'count': 100}, '35': {'count': 100}, '36': {'count': 100}, '37': {'count': 100}, '38': {'count': 100}, '39': {'count': 100}, '40': {'count': 100}, '41': {'count': 100}, '42': {'count': 100}, '43': {'count': 100}, '44': {'count': 100}, '45': {'count': 100}, '46': {'count': 100}, '47': {'count': 100}, '48': {'count': 100}, '49': {'count': 100}, '50': {'count': 100}, '51': {'count': 100}, '52': {'count': 100}, '53': {'count': 100}, '54': {'count': 100}, '55': {'count': 100}, '56': {'count': 100}, '57': {'count': 100}, '58': {'count': 100}, '59': {'count': 100}, '60': {'count': 100}, '61': {'count': 100}, '62': {'count': 100}, '63': {'count': 100}, '64': {'count': 100}, '65': {'count': 100}, '66': {'count': 100}, '67': {'count': 100}, '68': {'count': 100}, '69': {'count': 100}, '70': {'count': 100}, '71': {'count': 100}, '72': {'count': 100}, '73': {'count': 100}, '74': {'count': 100}, '75': {'count': 100}, '76': {'count': 100}, '77': {'count': 100}, '78': {'count': 100}, '79': {'count': 100}, '80': {'count': 100}, '81': {'count': 100}, '82': {'count': 100}, '83': {'count': 100}, '84': {'count': 100}, '85': {'count': 100}, '86': {'count': 100}, '87': {'count': 100}, '88': {'count': 100}, '89': {'count': 100}, '90': {'count': 100}, '91': {'count': 100}, '92': {'count': 100}, '93': {'count': 100}, '94': {'count': 100}, '95': {'count': 100}, '96': {'count': 100}, '97': {'count': 100}, '98': {'count': 100}, '99': {'count': 100}, '100': {'count': 100}, '101': {'count': 100}, '102': {'count': 100}, '103': {'count': 100}, '104': {'count': 100}, '105': {'count': 100}, '106': {'count': 100}, '107': {'count': 100}, '108': {'count': 100}, '109': {'count': 100}, '110': {'count': 100}, '111': {'count': 100}, '112': {'count': 100}, '113': {'count': 100}, '114': {'count': 100}, '115': {'count': 100}, '116': {'count': 100}, '117': {'count': 100}, '118': {'count': 100}, '119': {'count': 100}, '120': {'count': 100}, '121': {'count': 100}, '122': {'count': 100}, '123': {'count': 100}, '124': {'count': 100}, '125': {'count': 100}, '126': {'count': 100}, '127': {'count': 100}, '128': {'count': 100}, '129': {'count': 100}, '130': {'count': 100}, '131': {'count': 100}, '132': {'count': 100}, '133': {'count': 100}, '134': {'count': 100}, '135': {'count': 100}, '136': {'count': 100}, '137': {'count': 100}, '138': {'count': 100}, '139': {'count': 100}, '140': {'count': 100}, '141': {'count': 100}, '142': {'count': 100}, '143': {'count': 100}, '144': {'count': 100}, '145': {'count': 100}, '146': {'count': 100}, '147': {'count': 100}, '148': {'count': 100}, '149': {'count': 100}, '150': {'count': 100}, '151': {'count': 100}, '152': {'count': 100}, '153': {'count': 100}, '154': {'count': 100}, '155': {'count': 100}, '156': {'count': 100}, '157': {'count': 100}, '158': {'count': 100}, '159': {'count': 100}, '160': {'count': 100}, '161': {'count': 100}, '162': {'count': 100}, '163': {'count': 100}, '164': {'count': 100}, '165': {'count': 100}, '166': {'count': 100}, '167': {'count': 100}, '168': {'count': 100}, '169': {'count': 100}, '170': {'count': 100}, '171': {'count': 100}, '172': {'count': 100}, '173': {'count': 100}, '174': {'count': 100}, '175': {'count': 100}, '176': {'count': 100}, '177': {'count': 100}, '178': {'count': 100}, '179': {'count': 100}, '180': {'count': 100}, '181': {'count': 100}, '182': {'count': 100}, '183': {'count': 100}, '184': {'count': 100}, '185': {'count': 100}, '186': {'count': 100}, '187': {'count': 100}, '188': {'count': 100}, '189': {'count': 100}, '190': {'count': 100}, '191': {'count': 100}, '192': {'count': 100}, '193': {'count': 100}, '194': {'count': 100}, '195': {'count': 100}, '196': {'count': 100}, '197': {'count': 100}, '198': {'count': 100}, '199': {'count': 100}, '200': {'count': 100}, '201': {'count': 100}, '202': {'count': 100}, '203': {'count': 100}, '204': {'count': 100}, '205': {'count': 100}, '206': {'count': 100}, '207': {'count': 100}, '208': {'count': 100}, '209': {'count': 100}, '210': {'count': 100}}}}
CovidRetrieval ['cmn'] Retrieval s2p None None
CrossLingualSemanticDiscriminationWMT19 ['deu', 'fra'] Retrieval s2s [News, Written] None None
CrossLingualSemanticDiscriminationWMT21 ['deu', 'fra'] Retrieval s2s [News, Written] None None
CyrillicTurkicLangClassification (Goldhahn et al., 2012) ['bak', 'chv', 'kaz', 'kir', 'krc', 'rus', 'sah', 'tat', 'tyv'] Classification s2s [Web, Written] None None
CzechProductReviewSentimentClassification ['ces'] Classification s2s [Reviews, Written] None None
CzechSoMeSentimentClassification ['ces'] Classification s2s [Reviews, Written] None None
CzechSubjectivityClassification ['ces'] Classification s2s [Reviews, Written] None None
DBPedia (Hasibi et al., 2017) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
DBPedia-Fa ['fas'] Retrieval s2p [Encyclopaedic] None None
DBPedia-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Encyclopaedic, Written] None None
DBPedia-PL (Hasibi et al., 2017) ['pol'] Retrieval s2p [Encyclopaedic, Written] None None
DBPedia-PLHardNegatives (Hasibi et al., 2017) ['pol'] Retrieval s2p [Encyclopaedic, Written] None None
DBPediaHardNegatives (Hasibi et al., 2017) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
DBpediaClassification (Zhang et al., 2015) ['eng'] Classification s2s [Encyclopaedic, Written] None None
DKHateClassification ['dan'] Classification s2s [Social, Written] None None
DTD ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 1880} {'test': {'num_samples': 1880, 'unique_num_labels': 47, 'min_image_width': 300, 'average_image_width': 488.98, 'max_image_width': 900, 'min_image_height': 300, 'average_image_height': 447.5, 'max_image_height': 778, 'labels': {'0': {'count': 40}, '1': {'count': 40}, '10': {'count': 40}, '11': {'count': 40}, '12': {'count': 40}, '13': {'count': 40}, '14': {'count': 40}, '15': {'count': 40}, '16': {'count': 40}, '17': {'count': 40}, '18': {'count': 40}, '19': {'count': 40}, '2': {'count': 40}, '20': {'count': 40}, '21': {'count': 40}, '22': {'count': 40}, '23': {'count': 40}, '24': {'count': 40}, '25': {'count': 40}, '26': {'count': 40}, '27': {'count': 40}, '28': {'count': 40}, '29': {'count': 40}, '3': {'count': 40}, '30': {'count': 40}, '31': {'count': 40}, '32': {'count': 40}, '33': {'count': 40}, '34': {'count': 40}, '35': {'count': 40}, '36': {'count': 40}, '37': {'count': 40}, '38': {'count': 40}, '39': {'count': 40}, '4': {'count': 40}, '40': {'count': 40}, '41': {'count': 40}, '42': {'count': 40}, '43': {'count': 40}, '44': {'count': 40}, '45': {'count': 40}, '46': {'count': 40}, '5': {'count': 40}, '6': {'count': 40}, '7': {'count': 40}, '8': {'count': 40}, '9': {'count': 40}}}}
DTDZeroShot ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 1880} {'test': {'num_samples': 1880, 'unique_num_labels': 47, 'min_image_width': 300, 'average_image_width': 488.98, 'max_image_width': 900, 'min_image_height': 300, 'average_image_height': 447.5, 'max_image_height': 778, 'min_label_text_length': 24, 'average_label_text_length': 27.38, 'max_label_text_length': 32, 'labels': {'0': {'count': 40}, '1': {'count': 40}, '10': {'count': 40}, '11': {'count': 40}, '12': {'count': 40}, '13': {'count': 40}, '14': {'count': 40}, '15': {'count': 40}, '16': {'count': 40}, '17': {'count': 40}, '18': {'count': 40}, '19': {'count': 40}, '2': {'count': 40}, '20': {'count': 40}, '21': {'count': 40}, '22': {'count': 40}, '23': {'count': 40}, '24': {'count': 40}, '25': {'count': 40}, '26': {'count': 40}, '27': {'count': 40}, '28': {'count': 40}, '29': {'count': 40}, '3': {'count': 40}, '30': {'count': 40}, '31': {'count': 40}, '32': {'count': 40}, '33': {'count': 40}, '34': {'count': 40}, '35': {'count': 40}, '36': {'count': 40}, '37': {'count': 40}, '38': {'count': 40}, '39': {'count': 40}, '4': {'count': 40}, '40': {'count': 40}, '41': {'count': 40}, '42': {'count': 40}, '43': {'count': 40}, '44': {'count': 40}, '45': {'count': 40}, '46': {'count': 40}, '5': {'count': 40}, '6': {'count': 40}, '7': {'count': 40}, '8': {'count': 40}, '9': {'count': 40}}}}
DalajClassification ['swe'] Classification s2s [Non-fiction, Written] None None
DanFeverRetrieval ['dan'] Retrieval p2p [Encyclopaedic, Non-fiction, Spoken] None None
DanishPoliticalCommentsClassification (Mads Guldborg Kjeldgaard Kongsbak, 2019) ['dan'] Classification s2s [Social, Written] None None
DeepSentiPers ['fas'] Classification s2s [Reviews] None None
DefinitionClassificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
DiaBlaBitextMining (González et al., 2019) ['eng', 'fra'] BitextMining s2s [Social, Written] None None
DigikalamagClassification ['fas'] Classification p2p [Web] None None
DigikalamagClustering ['fas'] Clustering p2p [Web] None None
Diversity1LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Diversity2LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Diversity3LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Diversity4LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Diversity5LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
Diversity6LegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
DuRetrieval (Yifu Qiu, 2022) ['cmn'] Retrieval s2p None None
DutchBookReviewSentimentClassification (Benjamin et al., 2019) ['nld'] Classification s2s [Reviews, Written] None None
EDIST2ITRetrieval (Liu et al., 2023) ['eng'] Any2AnyRetrieval t2it [News] {'test': 1050308} {'test': {'number_of_characters': 97112347, 'num_samples': 1050308, 'num_queries': 3241, 'num_documents': 1047067, 'min_document_length': 3, 'average_document_length': 92.37, 'max_document_length': 264, 'unique_documents': 553171, 'num_document_images': 1047067, 'min_query_length': 30, 'average_query_length': 120.33, 'max_query_length': 340, 'unique_queries': 3241, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 2.57, 'max_relevant_docs_per_query': 8, 'unique_relevant_docs': 8324}}
ESCIReranking (Chandan K. Reddy, 2022) ['eng', 'jpn', 'spa'] Reranking s2p [Written] {'test': 29285} {'test': {'num_samples': 29285, 'number_of_characters': 254538331, 'num_positive': 271416, 'num_negative': 44235, 'min_query_length': 1, 'avg_query_length': 19.69, 'max_query_length': 151, 'unique_query': 29269, 'min_positive_length': 1, 'avg_positive_length': 803.92, 'max_positive_length': 8640, 'unique_positive': 217712, 'min_negative_length': 1, 'avg_negative_length': 808.5, 'max_negative_length': 4441, 'unique_negative': 39551, 'hf_subset_descriptive_stats': {'us': {'num_samples': 21296, 'number_of_characters': 186915609, 'num_positive': 189375, 'num_negative': 25463, 'min_query_length': 1, 'avg_query_length': 21.44, 'max_query_length': 151, 'unique_query': 21296, 'min_positive_length': 1, 'avg_positive_length': 868.37, 'max_positive_length': 5545, 'unique_positive': 150734, 'min_negative_length': 1, 'avg_negative_length': 864.45, 'max_negative_length': 3779, 'unique_negative': 23073}, 'es': {'num_samples': 3703, 'number_of_characters': 48861389, 'num_positive': 39110, 'num_negative': 10183, 'min_query_length': 3, 'avg_query_length': 20.68, 'max_query_length': 59, 'unique_query': 3703, 'min_positive_length': 1, 'avg_positive_length': 980.96, 'max_positive_length': 8640, 'unique_positive': 32921, 'min_negative_length': 1, 'avg_negative_length': 1023.22, 'max_negative_length': 4441, 'unique_negative': 9285}, 'jp': {'num_samples': 4286, 'number_of_characters': 18761333, 'num_positive': 42931, 'num_negative': 8589, 'min_query_length': 1, 'avg_query_length': 10.15, 'max_query_length': 60, 'unique_query': 4286, 'min_positive_length': 1, 'avg_positive_length': 358.36, 'max_positive_length': 3488, 'unique_positive': 35165, 'min_negative_length': 1, 'avg_negative_length': 388.08, 'max_negative_length': 3940, 'unique_negative': 7289}}}}
EcomRetrieval ['cmn'] Retrieval s2p None None
EightTagsClustering.v2 ['pol'] Clustering s2s [Social, Written] None None
EmotionClassification ['eng'] Classification s2s [Social, Written] None None
EncyclopediaVQAIT2ITRetrieval (Mensink et al., 2023) ['eng'] Any2AnyRetrieval it2it [Encyclopaedic] {'test': 72056} {'test': {'number_of_characters': 88615743, 'num_samples': 72056, 'num_queries': 3743, 'num_documents': 68313, 'min_document_length': 24, 'average_document_length': 1294.37, 'max_document_length': 72928, 'unique_documents': 49186, 'num_document_images': 68313, 'min_query_length': 19, 'average_query_length': 51.7, 'max_query_length': 245, 'unique_queries': 2832, 'num_query_images': 3743, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.31, 'max_relevant_docs_per_query': 75, 'unique_relevant_docs': 2683}}
EstQA ['est'] Retrieval s2p [Encyclopaedic, Written] None None
EstonianValenceClassification ['est'] Classification s2s [News, Written] None None
EuroSAT (Helber et al., 2019) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 5400} {'test': {'num_samples': 5400, 'unique_num_labels': 10, 'min_image_width': 64, 'average_image_width': 64.0, 'max_image_width': 64, 'min_image_height': 64, 'average_image_height': 64.0, 'max_image_height': 64, 'labels': {'4': {'count': 501}, '3': {'count': 496}, '7': {'count': 554}, '2': {'count': 573}, '9': {'count': 609}, '0': {'count': 596}, '8': {'count': 529}, '1': {'count': 608}, '5': {'count': 396}, '6': {'count': 538}}}}
EuroSATZeroShot (Helber et al., 2019) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 5400} {'test': {'num_samples': 5400, 'unique_num_labels': 10, 'min_image_width': 64, 'average_image_width': 64.0, 'max_image_width': 64, 'min_image_height': 64, 'average_image_height': 64.0, 'max_image_height': 64, 'min_label_text_length': 36, 'average_label_text_length': 45.2, 'max_label_text_length': 53, 'labels': {'4': {'count': 501}, '3': {'count': 496}, '7': {'count': 554}, '2': {'count': 573}, '9': {'count': 609}, '0': {'count': 596}, '8': {'count': 529}, '1': {'count': 608}, '5': {'count': 396}, '6': {'count': 538}}}}
FER2013 (Ian J. Goodfellow, 2015) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 7178} {'test': {'num_samples': 7178, 'unique_num_labels': 7, 'min_image_width': 48, 'average_image_width': 48.0, 'max_image_width': 48, 'min_image_height': 48, 'average_image_height': 48.0, 'max_image_height': 48, 'labels': {'0': {'count': 958}, '1': {'count': 111}, '2': {'count': 1024}, '3': {'count': 1774}, '4': {'count': 1233}, '5': {'count': 1247}, '6': {'count': 831}}}}
FER2013ZeroShot (Ian J. Goodfellow, 2015) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 7178} {'test': {'num_samples': 7178, 'unique_num_labels': 7, 'min_image_width': 48, 'average_image_width': 48.0, 'max_image_width': 48, 'min_image_height': 48, 'average_image_height': 48.0, 'max_image_height': 48, 'min_label_text_length': 30, 'average_label_text_length': 32.57, 'max_label_text_length': 35, 'labels': {'0': {'count': 958}, '1': {'count': 111}, '2': {'count': 1024}, '3': {'count': 1774}, '4': {'count': 1233}, '5': {'count': 1247}, '6': {'count': 831}}}}
FEVER ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
FEVER-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Encyclopaedic, Written] None None
FEVERHardNegatives ['eng'] Retrieval s2p None None
FGVCAircraft (Subhransu Maji, 2013) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 3333} {'test': {'num_samples': 3333, 'unique_num_labels': 100, 'min_image_width': 800, 'average_image_width': 1098.58, 'max_image_width': 1600, 'min_image_height': 413, 'average_image_height': 747.0, 'max_image_height': 1197, 'labels': {'0': {'count': 33}, '1': {'count': 33}, '2': {'count': 34}, '3': {'count': 33}, '4': {'count': 33}, '5': {'count': 34}, '6': {'count': 33}, '7': {'count': 33}, '8': {'count': 34}, '9': {'count': 33}, '10': {'count': 33}, '11': {'count': 34}, '12': {'count': 33}, '13': {'count': 33}, '14': {'count': 34}, '15': {'count': 33}, '16': {'count': 33}, '17': {'count': 34}, '18': {'count': 33}, '19': {'count': 33}, '20': {'count': 34}, '21': {'count': 33}, '22': {'count': 33}, '23': {'count': 34}, '24': {'count': 33}, '25': {'count': 33}, '26': {'count': 34}, '27': {'count': 33}, '28': {'count': 33}, '29': {'count': 34}, '30': {'count': 33}, '31': {'count': 33}, '32': {'count': 34}, '33': {'count': 33}, '34': {'count': 33}, '35': {'count': 34}, '36': {'count': 33}, '37': {'count': 33}, '38': {'count': 34}, '39': {'count': 33}, '40': {'count': 33}, '41': {'count': 34}, '42': {'count': 33}, '43': {'count': 33}, '44': {'count': 34}, '45': {'count': 33}, '46': {'count': 33}, '47': {'count': 34}, '48': {'count': 33}, '49': {'count': 33}, '50': {'count': 34}, '51': {'count': 33}, '52': {'count': 33}, '53': {'count': 34}, '54': {'count': 33}, '55': {'count': 33}, '56': {'count': 34}, '57': {'count': 33}, '58': {'count': 33}, '59': {'count': 34}, '60': {'count': 33}, '61': {'count': 33}, '62': {'count': 34}, '63': {'count': 33}, '64': {'count': 33}, '65': {'count': 34}, '66': {'count': 33}, '67': {'count': 33}, '68': {'count': 34}, '69': {'count': 33}, '70': {'count': 33}, '71': {'count': 34}, '72': {'count': 33}, '73': {'count': 33}, '74': {'count': 34}, '75': {'count': 33}, '76': {'count': 33}, '77': {'count': 34}, '78': {'count': 33}, '79': {'count': 33}, '80': {'count': 34}, '81': {'count': 33}, '82': {'count': 33}, '83': {'count': 34}, '84': {'count': 33}, '85': {'count': 33}, '86': {'count': 34}, '87': {'count': 33}, '88': {'count': 33}, '89': {'count': 34}, '90': {'count': 33}, '91': {'count': 33}, '92': {'count': 34}, '93': {'count': 33}, '94': {'count': 33}, '95': {'count': 34}, '96': {'count': 33}, '97': {'count': 33}, '98': {'count': 34}, '99': {'count': 33}}}}
FGVCAircraftZeroShot (Subhransu Maji, 2013) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 3333} {'test': {'num_samples': 3333, 'unique_num_labels': 100, 'min_image_width': 800, 'average_image_width': 1098.58, 'max_image_width': 1600, 'min_image_height': 413, 'average_image_height': 747.0, 'max_image_height': 1197, 'min_label_text_length': 38, 'average_label_text_length': 41.46, 'max_label_text_length': 53, 'labels': {'0': {'count': 33}, '1': {'count': 33}, '2': {'count': 34}, '3': {'count': 33}, '4': {'count': 33}, '5': {'count': 34}, '6': {'count': 33}, '7': {'count': 33}, '8': {'count': 34}, '9': {'count': 33}, '10': {'count': 33}, '11': {'count': 34}, '12': {'count': 33}, '13': {'count': 33}, '14': {'count': 34}, '15': {'count': 33}, '16': {'count': 33}, '17': {'count': 34}, '18': {'count': 33}, '19': {'count': 33}, '20': {'count': 34}, '21': {'count': 33}, '22': {'count': 33}, '23': {'count': 34}, '24': {'count': 33}, '25': {'count': 33}, '26': {'count': 34}, '27': {'count': 33}, '28': {'count': 33}, '29': {'count': 34}, '30': {'count': 33}, '31': {'count': 33}, '32': {'count': 34}, '33': {'count': 33}, '34': {'count': 33}, '35': {'count': 34}, '36': {'count': 33}, '37': {'count': 33}, '38': {'count': 34}, '39': {'count': 33}, '40': {'count': 33}, '41': {'count': 34}, '42': {'count': 33}, '43': {'count': 33}, '44': {'count': 34}, '45': {'count': 33}, '46': {'count': 33}, '47': {'count': 34}, '48': {'count': 33}, '49': {'count': 33}, '50': {'count': 34}, '51': {'count': 33}, '52': {'count': 33}, '53': {'count': 34}, '54': {'count': 33}, '55': {'count': 33}, '56': {'count': 34}, '57': {'count': 33}, '58': {'count': 33}, '59': {'count': 34}, '60': {'count': 33}, '61': {'count': 33}, '62': {'count': 34}, '63': {'count': 33}, '64': {'count': 33}, '65': {'count': 34}, '66': {'count': 33}, '67': {'count': 33}, '68': {'count': 34}, '69': {'count': 33}, '70': {'count': 33}, '71': {'count': 34}, '72': {'count': 33}, '73': {'count': 33}, '74': {'count': 34}, '75': {'count': 33}, '76': {'count': 33}, '77': {'count': 34}, '78': {'count': 33}, '79': {'count': 33}, '80': {'count': 34}, '81': {'count': 33}, '82': {'count': 33}, '83': {'count': 34}, '84': {'count': 33}, '85': {'count': 33}, '86': {'count': 34}, '87': {'count': 33}, '88': {'count': 33}, '89': {'count': 34}, '90': {'count': 33}, '91': {'count': 33}, '92': {'count': 34}, '93': {'count': 33}, '94': {'count': 33}, '95': {'count': 34}, '96': {'count': 33}, '97': {'count': 33}, '98': {'count': 34}, '99': {'count': 33}}}}
FORBI2IRetrieval (Pengxiang Wu, 2023) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 67234} {'test': {'number_of_characters': 0, 'num_samples': 67234, 'num_queries': 13250, 'num_documents': 53984, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 53984, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 13250, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 4235}}
FQuADRetrieval ['fra'] Retrieval s2p [Encyclopaedic, Written] None None
FaithDial (Dziri et al., 2022) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
FalseFriendsGermanEnglish ['deu'] PairClassification s2s [Written] None None
FaroeseSTS ['fao'] STS s2s [News, Web, Written] None None
FarsTail (Amirkhani et al., 2023) ['fas'] PairClassification s2s [Academic, Written] None None
FarsiParaphraseDetection ['fas'] PairClassification s2s None None
Farsick ['fas'] STS s2s None None
Fashion200kI2TRetrieval (Han et al., 2017) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 66596} {'test': {'number_of_characters': 2054137, 'num_samples': 66596, 'num_queries': 4889, 'num_documents': 61707, 'min_document_length': 11, 'average_document_length': 33.29, 'max_document_length': 96, 'unique_documents': 61707, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 4889, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3119}}
Fashion200kT2IRetrieval (Han et al., 2017) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 203543} {'test': {'number_of_characters': 57390, 'num_samples': 203543, 'num_queries': 1719, 'num_documents': 201824, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 201824, 'min_query_length': 14, 'average_query_length': 33.39, 'max_query_length': 83, 'unique_queries': 1719, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 2.82, 'max_relevant_docs_per_query': 35, 'unique_relevant_docs': 4847}}
FashionIQIT2IRetrieval (Wu et al., 2021) ['eng'] Any2AnyRetrieval it2i [Encyclopaedic] {'test': 80384} {'test': {'number_of_characters': 361250, 'num_samples': 80384, 'num_queries': 6003, 'num_documents': 74381, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 74381, 'min_query_length': 18, 'average_query_length': 60.18, 'max_query_length': 138, 'unique_queries': 5973, 'num_query_images': 6003, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 4, 'unique_relevant_docs': 6003}}
FeedbackQARetrieval ['eng'] Retrieval s2p [Government, Medical, Web, Written] None None
FiQA-PL (Nandan Thakur, 2021) ['pol'] Retrieval s2p [Financial, Written] None None
FiQA2018 (Nandan Thakur, 2021) ['eng'] Retrieval s2p [Financial, Written] None None
FiQA2018-Fa ['fas'] Retrieval s2p [Web] None None
FiQA2018-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Non-fiction, Written] None None
FilipinoHateSpeechClassification (Neil Vicente Cabasag et al., 2019) ['fil'] Classification s2s [Social, Written] None None
FilipinoShopeeReviewsClassification ['fil'] Classification s2s [Social, Written] None None
FinParaSTS ['fin'] STS s2s [News, Subtitles, Written] None None
FinToxicityClassification ['fin'] Classification s2s [News, Written] None None
FinancialPhrasebankClassification (P. Malo, 2014) ['eng'] Classification s2s [Financial, News, Written] None None
Flickr30kI2TRetrieval (Peter Young, 2014) ['eng'] Any2AnyRetrieval i2t [Web, Written] {'test': 6000} {'test': {'number_of_characters': 319250, 'num_samples': 6000, 'num_queries': 1000, 'num_documents': 5000, 'min_document_length': 11, 'average_document_length': 63.85, 'max_document_length': 375, 'unique_documents': 4999, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 1000, 'min_relevant_docs_per_query': 5, 'average_relevant_docs_per_query': 5.0, 'max_relevant_docs_per_query': 5, 'unique_relevant_docs': 5000}}
Flickr30kT2IRetrieval (Peter Young, 2014) ['eng'] Any2AnyRetrieval t2i [Web, Written] {'test': 6000} {'test': {'number_of_characters': 319250, 'num_samples': 6000, 'num_queries': 5000, 'num_documents': 1000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 1000, 'min_query_length': 11, 'average_query_length': 63.85, 'max_query_length': 375, 'unique_queries': 4999, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}
FloresBitextMining (Goyal et al., 2022) ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] BitextMining s2s [Encyclopaedic, Non-fiction, Written] None None
Food101Classification (Bossard et al., 2014) ['eng'] ImageClassification i2i [Web] {'validation': 25250} {'validation': {'num_samples': 25250, 'unique_num_labels': 101, 'min_image_width': 287, 'average_image_width': 495.82, 'max_image_width': 512, 'min_image_height': 213, 'average_image_height': 475.08, 'max_image_height': 512, 'labels': {'6': {'count': 250}, '79': {'count': 250}, '81': {'count': 250}, '53': {'count': 250}, '10': {'count': 250}, '20': {'count': 250}, '77': {'count': 250}, '48': {'count': 250}, '86': {'count': 250}, '84': {'count': 250}, '76': {'count': 250}, '34': {'count': 250}, '51': {'count': 250}, '21': {'count': 250}, '64': {'count': 250}, '0': {'count': 250}, '43': {'count': 250}, '44': {'count': 250}, '73': {'count': 250}, '57': {'count': 250}, '14': {'count': 250}, '5': {'count': 250}, '46': {'count': 250}, '55': {'count': 250}, '93': {'count': 250}, '98': {'count': 250}, '38': {'count': 250}, '11': {'count': 250}, '99': {'count': 250}, '72': {'count': 250}, '22': {'count': 250}, '59': {'count': 250}, '70': {'count': 250}, '16': {'count': 250}, '2': {'count': 250}, '58': {'count': 250}, '83': {'count': 250}, '96': {'count': 250}, '39': {'count': 250}, '49': {'count': 250}, '45': {'count': 250}, '88': {'count': 250}, '9': {'count': 250}, '26': {'count': 250}, '94': {'count': 250}, '4': {'count': 250}, '65': {'count': 250}, '32': {'count': 250}, '27': {'count': 250}, '36': {'count': 250}, '87': {'count': 250}, '69': {'count': 250}, '85': {'count': 250}, '25': {'count': 250}, '40': {'count': 250}, '19': {'count': 250}, '35': {'count': 250}, '56': {'count': 250}, '42': {'count': 250}, '60': {'count': 250}, '68': {'count': 250}, '100': {'count': 250}, '41': {'count': 250}, '92': {'count': 250}, '24': {'count': 250}, '3': {'count': 250}, '89': {'count': 250}, '75': {'count': 250}, '17': {'count': 250}, '97': {'count': 250}, '61': {'count': 250}, '33': {'count': 250}, '80': {'count': 250}, '30': {'count': 250}, '8': {'count': 250}, '74': {'count': 250}, '66': {'count': 250}, '31': {'count': 250}, '18': {'count': 250}, '67': {'count': 250}, '37': {'count': 250}, '13': {'count': 250}, '63': {'count': 250}, '28': {'count': 250}, '47': {'count': 250}, '52': {'count': 250}, '54': {'count': 250}, '1': {'count': 250}, '82': {'count': 250}, '91': {'count': 250}, '95': {'count': 250}, '7': {'count': 250}, '29': {'count': 250}, '78': {'count': 250}, '15': {'count': 250}, '23': {'count': 250}, '12': {'count': 250}, '62': {'count': 250}, '50': {'count': 250}, '71': {'count': 250}, '90': {'count': 250}}}}
Food101ZeroShot (Bossard et al., 2014) ['eng'] ZeroShotClassification i2t [Web] {'validation': 25250} {'validation': {'num_samples': 25250, 'unique_num_labels': 101, 'min_image_width': 287, 'average_image_width': 495.82, 'max_image_width': 512, 'min_image_height': 213, 'average_image_height': 475.08, 'max_image_height': 512, 'min_label_text_length': 31, 'average_label_text_length': 38.72, 'max_label_text_length': 51, 'labels': {'6': {'count': 250}, '79': {'count': 250}, '81': {'count': 250}, '53': {'count': 250}, '10': {'count': 250}, '20': {'count': 250}, '77': {'count': 250}, '48': {'count': 250}, '86': {'count': 250}, '84': {'count': 250}, '76': {'count': 250}, '34': {'count': 250}, '51': {'count': 250}, '21': {'count': 250}, '64': {'count': 250}, '0': {'count': 250}, '43': {'count': 250}, '44': {'count': 250}, '73': {'count': 250}, '57': {'count': 250}, '14': {'count': 250}, '5': {'count': 250}, '46': {'count': 250}, '55': {'count': 250}, '93': {'count': 250}, '98': {'count': 250}, '38': {'count': 250}, '11': {'count': 250}, '99': {'count': 250}, '72': {'count': 250}, '22': {'count': 250}, '59': {'count': 250}, '70': {'count': 250}, '16': {'count': 250}, '2': {'count': 250}, '58': {'count': 250}, '83': {'count': 250}, '96': {'count': 250}, '39': {'count': 250}, '49': {'count': 250}, '45': {'count': 250}, '88': {'count': 250}, '9': {'count': 250}, '26': {'count': 250}, '94': {'count': 250}, '4': {'count': 250}, '65': {'count': 250}, '32': {'count': 250}, '27': {'count': 250}, '36': {'count': 250}, '87': {'count': 250}, '69': {'count': 250}, '85': {'count': 250}, '25': {'count': 250}, '40': {'count': 250}, '19': {'count': 250}, '35': {'count': 250}, '56': {'count': 250}, '42': {'count': 250}, '60': {'count': 250}, '68': {'count': 250}, '100': {'count': 250}, '41': {'count': 250}, '92': {'count': 250}, '24': {'count': 250}, '3': {'count': 250}, '89': {'count': 250}, '75': {'count': 250}, '17': {'count': 250}, '97': {'count': 250}, '61': {'count': 250}, '33': {'count': 250}, '80': {'count': 250}, '30': {'count': 250}, '8': {'count': 250}, '74': {'count': 250}, '66': {'count': 250}, '31': {'count': 250}, '18': {'count': 250}, '67': {'count': 250}, '37': {'count': 250}, '13': {'count': 250}, '63': {'count': 250}, '28': {'count': 250}, '47': {'count': 250}, '52': {'count': 250}, '54': {'count': 250}, '1': {'count': 250}, '82': {'count': 250}, '91': {'count': 250}, '95': {'count': 250}, '7': {'count': 250}, '29': {'count': 250}, '78': {'count': 250}, '15': {'count': 250}, '23': {'count': 250}, '12': {'count': 250}, '62': {'count': 250}, '50': {'count': 250}, '71': {'count': 250}, '90': {'count': 250}}}}
FrenchBookReviews ['fra'] Classification s2s [Reviews, Written] None None
FrenkEnClassification (Nikola Ljubešić, 2019) ['eng'] Classification s2s [Social, Written] None None
FrenkHrClassification (Nikola Ljubešić, 2019) ['hrv'] Classification s2s [Social, Written] None None
FrenkSlClassification (Nikola Ljubešić, 2019) ['slv'] Classification s2s [Social, Written] None None
FunctionOfDecisionSectionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
GLDv2I2IRetrieval (Weyand et al., 2020) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 762886} {'test': {'number_of_characters': 0, 'num_samples': 762886, 'num_queries': 1129, 'num_documents': 761757, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 761757, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 1129, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 13.41, 'max_relevant_docs_per_query': 168, 'unique_relevant_docs': 3081}}
GLDv2I2TRetrieval (Weyand et al., 2020) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 2378} {'test': {'number_of_characters': 14829, 'num_samples': 2378, 'num_queries': 1704, 'num_documents': 674, 'min_document_length': 4, 'average_document_length': 22.0, 'max_document_length': 70, 'unique_documents': 674, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 1704, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.14, 'max_relevant_docs_per_query': 3, 'unique_relevant_docs': 674}}
GPUSpeedTask ['eng'] Speed s2s [Fiction, Written] None None
GTSRB (Stallkamp et al., 2011) ['eng'] ImageClassification i2i [Scene] {'test': 12630} {'test': {'num_samples': 12630, 'unique_num_labels': 43, 'min_image_width': 25, 'average_image_width': 50.51, 'max_image_width': 266, 'min_image_height': 25, 'average_image_height': 50.36, 'max_image_height': 232, 'labels': {'16': {'count': 150}, '1': {'count': 720}, '38': {'count': 690}, '33': {'count': 210}, '11': {'count': 420}, '18': {'count': 390}, '12': {'count': 690}, '25': {'count': 480}, '35': {'count': 390}, '7': {'count': 450}, '23': {'count': 150}, '4': {'count': 660}, '9': {'count': 480}, '21': {'count': 90}, '20': {'count': 90}, '27': {'count': 60}, '3': {'count': 450}, '13': {'count': 720}, '10': {'count': 660}, '5': {'count': 630}, '17': {'count': 360}, '34': {'count': 120}, '2': {'count': 750}, '8': {'count': 450}, '30': {'count': 150}, '24': {'count': 90}, '15': {'count': 210}, '26': {'count': 180}, '28': {'count': 150}, '22': {'count': 120}, '14': {'count': 270}, '32': {'count': 60}, '29': {'count': 90}, '6': {'count': 150}, '36': {'count': 120}, '40': {'count': 90}, '41': {'count': 60}, '31': {'count': 270}, '19': {'count': 60}, '0': {'count': 60}, '39': {'count': 90}, '42': {'count': 90}, '37': {'count': 60}}}}
GTSRBZeroShot (Stallkamp et al., 2011) ['eng'] ZeroShotClassification i2t [Scene] {'test': 12630} {'test': {'num_samples': 12630, 'unique_num_labels': 43, 'min_image_width': 25, 'average_image_width': 50.51, 'max_image_width': 266, 'min_image_height': 25, 'average_image_height': 50.36, 'max_image_height': 232, 'min_label_text_length': 43, 'average_label_text_length': 88.23, 'max_label_text_length': 116, 'labels': {'16': {'count': 150}, '1': {'count': 720}, '38': {'count': 690}, '33': {'count': 210}, '11': {'count': 420}, '18': {'count': 390}, '12': {'count': 690}, '25': {'count': 480}, '35': {'count': 390}, '7': {'count': 450}, '23': {'count': 150}, '4': {'count': 660}, '9': {'count': 480}, '21': {'count': 90}, '20': {'count': 90}, '27': {'count': 60}, '3': {'count': 450}, '13': {'count': 720}, '10': {'count': 660}, '5': {'count': 630}, '17': {'count': 360}, '34': {'count': 120}, '2': {'count': 750}, '8': {'count': 450}, '30': {'count': 150}, '24': {'count': 90}, '15': {'count': 210}, '26': {'count': 180}, '28': {'count': 150}, '22': {'count': 120}, '14': {'count': 270}, '32': {'count': 60}, '29': {'count': 90}, '6': {'count': 150}, '36': {'count': 120}, '40': {'count': 90}, '41': {'count': 60}, '31': {'count': 270}, '19': {'count': 60}, '0': {'count': 60}, '39': {'count': 90}, '42': {'count': 90}, '37': {'count': 60}}}}
GeoreviewClassification ['rus'] Classification p2p [Reviews, Written] None None
GeoreviewClusteringP2P ['rus'] Clustering p2p [Reviews, Written] None None
GeorgianFAQRetrieval ['kat'] Retrieval s2p [Web, Written] None None
GerDaLIR ['deu'] Retrieval s2p None None
GerDaLIRSmall ['deu'] Retrieval p2p [Legal, Written] None None
GermanDPR (Timo Möller, 2021) ['deu'] Retrieval s2p None None
GermanGovServiceRetrieval ['deu'] Retrieval s2p [Government, Written] None None
GermanPoliticiansTwitterSentimentClassification ['deu'] Classification s2s [Government, Social, Written] None None
GermanQuAD-Retrieval (Timo Möller, 2021) ['deu'] Retrieval s2p None None
GermanSTSBenchmark (Philip May, 2021) ['deu'] STS s2s None None
GreekCivicsQA ['ell'] Retrieval s2p [Academic, Written] None None
GreekLegalCodeClassification ['ell'] Classification s2s [Legal, Written] None None
GujaratiNewsClassification ['guj'] Classification s2s [News, Written] None None
HALClusteringS2S.v2 (Mathieu Ciancone, 2024) ['fra'] Clustering s2s [Academic, Written] None None
HagridRetrieval (Ehsan Kamalloo, 2023) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
HamshahriClustring ['fas'] Clustering p2p [News] None None
HateSpeechPortugueseClassification ['por'] Classification s2s [Social, Written] None None
HatefulMemesI2TRetrieval (Kiela et al., 2020) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 11000} {'test': {'number_of_characters': 610257, 'num_samples': 11000, 'num_queries': 1000, 'num_documents': 10000, 'min_document_length': 3, 'average_document_length': 61.03, 'max_document_length': 433, 'unique_documents': 8045, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 1000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}
HatefulMemesT2IRetrieval (Kiela et al., 2020) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 11000} {'test': {'number_of_characters': 55468, 'num_samples': 11000, 'num_queries': 1000, 'num_documents': 10000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 10000, 'min_query_length': 3, 'average_query_length': 55.47, 'max_query_length': 382, 'unique_queries': 829, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1000}}
HeadlineClassification ['rus'] Classification s2s [News, Written] None None
HebrewSentimentAnalysis ['heb'] Classification s2s [Reviews, Written] None None
HellaSwag (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
HinDialectClassification (Bafna et al., 2022) ['anp', 'awa', 'ben', 'bgc', 'bhb', 'bhd', 'bho', 'bjj', 'bns', 'bra', 'gbm', 'guj', 'hne', 'kfg', 'kfy', 'mag', 'mar', 'mup', 'noe', 'pan', 'raj'] Classification s2s [Social, Spoken, Written] None None
HindiDiscourseClassification ['hin'] Classification s2s [Fiction, Social, Written] None None
HotelReviewSentimentClassification (Elnagar et al., 2018) ['ara'] Classification s2s [Reviews, Written] None None
HotpotQA ['eng'] Retrieval s2p [Web, Written] None None
HotpotQA-Fa ['fas'] Retrieval s2p [Encyclopaedic] None None
HotpotQA-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Web, Written] None None
HotpotQA-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Web, Written] None None
HotpotQA-PLHardNegatives (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Web, Written] None None
HotpotQAHardNegatives ['eng'] Retrieval s2p [Web, Written] None None
HunSum2AbstractiveRetrieval (Botond Barta, 2024) ['hun'] Retrieval s2p [News, Written] None None
IFlyTek ['cmn'] Classification s2s None None
IN22ConvBitextMining (Jay Gala, 2023) ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] BitextMining s2s [Fiction, Social, Spoken, Spoken] {'test': 760518} {'test': {'num_samples': 760518, 'number_of_characters': 82637104, 'unique_pairs': 759283, 'min_sentence1_length': 3, 'average_sentence1_length': 54.33, 'max_sentence1_length': 239, 'unique_sentence1': 34430, 'min_sentence2_length': 3, 'average_sentence2_length': 54.33, 'max_sentence2_length': 239, 'unique_sentence2': 34430, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'asm_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'asm_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'asm_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'asm_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'asm_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'asm_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'asm_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'asm_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'asm_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'asm_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'asm_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'asm_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'asm_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'asm_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'asm_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'asm_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'asm_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'asm_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'asm_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'asm_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'asm_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.75, 'max_sentence1_length': 208, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ben_Beng-asm_Beng': {'num_samples': 1503, 'number_of_characters': 155988, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ben_Beng-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ben_Beng-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ben_Beng-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ben_Beng-gom_Deva': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ben_Beng-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ben_Beng-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ben_Beng-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ben_Beng-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ben_Beng-mai_Deva': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ben_Beng-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ben_Beng-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ben_Beng-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ben_Beng-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ben_Beng-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'ben_Beng-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ben_Beng-san_Deva': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ben_Beng-sat_Olck': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ben_Beng-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ben_Beng-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ben_Beng-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ben_Beng-urd_Arab': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.03, 'max_sentence1_length': 178, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'brx_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162044, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'brx_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'brx_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'brx_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'brx_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'brx_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'brx_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'brx_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'brx_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'brx_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'brx_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'brx_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'brx_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'brx_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'brx_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'brx_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'brx_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'brx_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'brx_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'brx_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'brx_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'brx_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.06, 'max_sentence1_length': 210, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'doi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 167032, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'doi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 161436, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'doi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 167492, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'doi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'doi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'doi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'doi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'doi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'doi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'doi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'doi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'doi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'doi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'doi_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'doi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'doi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'doi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'doi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'doi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'doi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'doi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'doi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 57.38, 'max_sentence1_length': 209, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'eng_Latn-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160716, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'eng_Latn-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155120, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'eng_Latn-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161176, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'eng_Latn-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166164, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'eng_Latn-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'eng_Latn-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'eng_Latn-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'eng_Latn-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'eng_Latn-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'eng_Latn-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'eng_Latn-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'eng_Latn-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'eng_Latn-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'eng_Latn-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'eng_Latn-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'eng_Latn-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'eng_Latn-san_Deva': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'eng_Latn-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'eng_Latn-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'eng_Latn-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'eng_Latn-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'eng_Latn-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.18, 'max_sentence1_length': 201, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'gom_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 156282, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'gom_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 150686, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'gom_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 156742, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'gom_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'gom_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 155414, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'gom_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'gom_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'gom_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'gom_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'gom_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'gom_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'gom_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'gom_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'gom_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'gom_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'gom_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'gom_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'gom_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'gom_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'gom_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'gom_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'gom_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 50.23, 'max_sentence1_length': 203, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'guj_Gujr-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158269, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'guj_Gujr-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152673, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'guj_Gujr-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'guj_Gujr-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163717, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'guj_Gujr-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157401, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'guj_Gujr-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152967, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'guj_Gujr-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'guj_Gujr-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'guj_Gujr-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'guj_Gujr-mai_Deva': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'guj_Gujr-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'guj_Gujr-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'guj_Gujr-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'guj_Gujr-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'guj_Gujr-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'guj_Gujr-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'guj_Gujr-san_Deva': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'guj_Gujr-sat_Olck': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'guj_Gujr-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'guj_Gujr-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'guj_Gujr-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'guj_Gujr-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 51.55, 'max_sentence1_length': 205, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'hin_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 159964, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'hin_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154368, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'hin_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160424, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'hin_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165412, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'hin_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159096, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'hin_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154662, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'hin_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156649, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'hin_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'hin_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'hin_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'hin_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'hin_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'hin_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'hin_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'hin_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'hin_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'hin_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'hin_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'hin_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'hin_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'hin_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'hin_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.68, 'max_sentence1_length': 192, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kan_Knda-asm_Beng': {'num_samples': 1503, 'number_of_characters': 165177, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kan_Knda-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159581, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kan_Knda-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165637, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kan_Knda-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170625, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kan_Knda-eng_Latn': {'num_samples': 1503, 'number_of_characters': 164309, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kan_Knda-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159875, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kan_Knda-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161862, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kan_Knda-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163557, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kan_Knda-kas_Arab': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'kan_Knda-mai_Deva': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kan_Knda-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kan_Knda-mar_Deva': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kan_Knda-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kan_Knda-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kan_Knda-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kan_Knda-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kan_Knda-san_Deva': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kan_Knda-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kan_Knda-snd_Deva': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kan_Knda-tam_Taml': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kan_Knda-tel_Telu': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kan_Knda-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 56.14, 'max_sentence1_length': 201, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'kas_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164681, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'kas_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 159085, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'kas_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 165141, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'kas_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 170129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'kas_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163813, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'kas_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 159379, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'kas_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'kas_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 163061, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'kas_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 168274, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'kas_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'kas_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'kas_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'kas_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'kas_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'kas_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'kas_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'kas_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'kas_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'kas_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'kas_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'kas_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'kas_Arab-urd_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 55.81, 'max_sentence1_length': 203, 'unique_sentence1': 1502, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mai_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162408, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mai_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 156812, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mai_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 162868, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mai_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 167856, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mai_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161540, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mai_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157106, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mai_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159093, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mai_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 160788, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mai_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166001, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mai_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165505, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mai_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mai_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mai_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mai_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mai_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mai_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mai_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mai_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mai_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mai_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mai_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mai_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 54.3, 'max_sentence1_length': 230, 'unique_sentence1': 1499, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mal_Mlym-asm_Beng': {'num_samples': 1503, 'number_of_characters': 172838, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mal_Mlym-ben_Beng': {'num_samples': 1503, 'number_of_characters': 167242, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mal_Mlym-brx_Deva': {'num_samples': 1503, 'number_of_characters': 173298, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mal_Mlym-doi_Deva': {'num_samples': 1503, 'number_of_characters': 178286, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mal_Mlym-eng_Latn': {'num_samples': 1503, 'number_of_characters': 171970, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mal_Mlym-gom_Deva': {'num_samples': 1503, 'number_of_characters': 167536, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mal_Mlym-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 169523, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mal_Mlym-hin_Deva': {'num_samples': 1503, 'number_of_characters': 171218, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mal_Mlym-kan_Knda': {'num_samples': 1503, 'number_of_characters': 176431, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mal_Mlym-kas_Arab': {'num_samples': 1503, 'number_of_characters': 175935, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mal_Mlym-mai_Deva': {'num_samples': 1503, 'number_of_characters': 173662, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mal_Mlym-mar_Deva': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mal_Mlym-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mal_Mlym-npi_Deva': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mal_Mlym-ory_Orya': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mal_Mlym-pan_Guru': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mal_Mlym-san_Deva': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mal_Mlym-sat_Olck': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mal_Mlym-snd_Deva': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mal_Mlym-tam_Taml': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mal_Mlym-tel_Telu': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mal_Mlym-urd_Arab': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 5, 'average_sentence1_length': 61.24, 'max_sentence1_length': 219, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mar_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162747, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mar_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157151, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mar_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163207, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mar_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168195, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mar_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161879, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mar_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157445, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mar_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159432, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mar_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161127, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mar_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166340, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mar_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165844, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mar_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163571, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mar_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 174001, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mar_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'mar_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mar_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mar_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mar_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mar_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mar_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mar_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mar_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mar_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.53, 'max_sentence1_length': 221, 'unique_sentence1': 1501, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'mni_Mtei-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157316, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'mni_Mtei-ben_Beng': {'num_samples': 1503, 'number_of_characters': 151720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'mni_Mtei-brx_Deva': {'num_samples': 1503, 'number_of_characters': 157776, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'mni_Mtei-doi_Deva': {'num_samples': 1503, 'number_of_characters': 162764, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'mni_Mtei-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156448, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'mni_Mtei-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152014, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'mni_Mtei-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154001, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'mni_Mtei-hin_Deva': {'num_samples': 1503, 'number_of_characters': 155696, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'mni_Mtei-kan_Knda': {'num_samples': 1503, 'number_of_characters': 160909, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'mni_Mtei-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160413, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'mni_Mtei-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158140, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'mni_Mtei-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168570, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'mni_Mtei-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158479, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'mni_Mtei-npi_Deva': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'mni_Mtei-ory_Orya': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'mni_Mtei-pan_Guru': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'mni_Mtei-san_Deva': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'mni_Mtei-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'mni_Mtei-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'mni_Mtei-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'mni_Mtei-tel_Telu': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'mni_Mtei-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 50.91, 'max_sentence1_length': 239, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'npi_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160906, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'npi_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155310, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'npi_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161366, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'npi_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166354, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'npi_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160038, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'npi_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 155604, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'npi_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157591, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'npi_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159286, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'npi_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164499, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'npi_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'npi_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161730, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'npi_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172160, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'npi_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162069, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'npi_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 156638, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'npi_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'npi_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'npi_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'npi_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'npi_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'npi_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'npi_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'npi_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.3, 'max_sentence1_length': 223, 'unique_sentence1': 1497, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'ory_Orya-asm_Beng': {'num_samples': 1503, 'number_of_characters': 164223, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'ory_Orya-ben_Beng': {'num_samples': 1503, 'number_of_characters': 158627, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'ory_Orya-brx_Deva': {'num_samples': 1503, 'number_of_characters': 164683, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'ory_Orya-doi_Deva': {'num_samples': 1503, 'number_of_characters': 169671, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'ory_Orya-eng_Latn': {'num_samples': 1503, 'number_of_characters': 163355, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'ory_Orya-gom_Deva': {'num_samples': 1503, 'number_of_characters': 158921, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'ory_Orya-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 160908, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'ory_Orya-hin_Deva': {'num_samples': 1503, 'number_of_characters': 162603, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'ory_Orya-kan_Knda': {'num_samples': 1503, 'number_of_characters': 167816, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'ory_Orya-kas_Arab': {'num_samples': 1503, 'number_of_characters': 167320, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'ory_Orya-mai_Deva': {'num_samples': 1503, 'number_of_characters': 165047, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'ory_Orya-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 175477, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'ory_Orya-mar_Deva': {'num_samples': 1503, 'number_of_characters': 165386, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'ory_Orya-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 159955, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'ory_Orya-npi_Deva': {'num_samples': 1503, 'number_of_characters': 163545, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'ory_Orya-pan_Guru': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'ory_Orya-san_Deva': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'ory_Orya-sat_Olck': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'ory_Orya-snd_Deva': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'ory_Orya-tam_Taml': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'ory_Orya-tel_Telu': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'ory_Orya-urd_Arab': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 55.51, 'max_sentence1_length': 195, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'pan_Guru-asm_Beng': {'num_samples': 1503, 'number_of_characters': 160201, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'pan_Guru-ben_Beng': {'num_samples': 1503, 'number_of_characters': 154605, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'pan_Guru-brx_Deva': {'num_samples': 1503, 'number_of_characters': 160661, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'pan_Guru-doi_Deva': {'num_samples': 1503, 'number_of_characters': 165649, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'pan_Guru-eng_Latn': {'num_samples': 1503, 'number_of_characters': 159333, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'pan_Guru-gom_Deva': {'num_samples': 1503, 'number_of_characters': 154899, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'pan_Guru-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 156886, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'pan_Guru-hin_Deva': {'num_samples': 1503, 'number_of_characters': 158581, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'pan_Guru-kan_Knda': {'num_samples': 1503, 'number_of_characters': 163794, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'pan_Guru-kas_Arab': {'num_samples': 1503, 'number_of_characters': 163298, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'pan_Guru-mai_Deva': {'num_samples': 1503, 'number_of_characters': 161025, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'pan_Guru-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 171455, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'pan_Guru-mar_Deva': {'num_samples': 1503, 'number_of_characters': 161364, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'pan_Guru-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 155933, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'pan_Guru-npi_Deva': {'num_samples': 1503, 'number_of_characters': 159523, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'pan_Guru-ory_Orya': {'num_samples': 1503, 'number_of_characters': 162840, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'pan_Guru-san_Deva': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'pan_Guru-sat_Olck': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'pan_Guru-snd_Deva': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'pan_Guru-tam_Taml': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'pan_Guru-tel_Telu': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'pan_Guru-urd_Arab': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 52.83, 'max_sentence1_length': 221, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'san_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 158093, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'san_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152497, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'san_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158553, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'san_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163541, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'san_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 157225, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'san_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152791, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'san_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154778, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'san_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156473, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'san_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161686, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'san_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 161190, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'san_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158917, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'san_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 169347, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'san_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 159256, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'san_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153825, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'san_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157415, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'san_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160732, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'san_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156710, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'san_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'san_Deva-snd_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'san_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'san_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'san_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 51.43, 'max_sentence1_length': 181, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'sat_Olck-asm_Beng': {'num_samples': 1503, 'number_of_characters': 169379, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'sat_Olck-ben_Beng': {'num_samples': 1503, 'number_of_characters': 163783, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'sat_Olck-brx_Deva': {'num_samples': 1503, 'number_of_characters': 169839, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'sat_Olck-doi_Deva': {'num_samples': 1503, 'number_of_characters': 174827, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'sat_Olck-eng_Latn': {'num_samples': 1503, 'number_of_characters': 168511, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'sat_Olck-gom_Deva': {'num_samples': 1503, 'number_of_characters': 164077, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'sat_Olck-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 166064, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'sat_Olck-hin_Deva': {'num_samples': 1503, 'number_of_characters': 167759, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'sat_Olck-kan_Knda': {'num_samples': 1503, 'number_of_characters': 172972, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'sat_Olck-kas_Arab': {'num_samples': 1503, 'number_of_characters': 172476, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'sat_Olck-mai_Deva': {'num_samples': 1503, 'number_of_characters': 170203, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'sat_Olck-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 180633, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'sat_Olck-mar_Deva': {'num_samples': 1503, 'number_of_characters': 170542, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'sat_Olck-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 165111, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'sat_Olck-npi_Deva': {'num_samples': 1503, 'number_of_characters': 168701, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'sat_Olck-ory_Orya': {'num_samples': 1503, 'number_of_characters': 172018, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'sat_Olck-pan_Guru': {'num_samples': 1503, 'number_of_characters': 167996, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'sat_Olck-san_Deva': {'num_samples': 1503, 'number_of_characters': 165888, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'sat_Olck-snd_Deva': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'sat_Olck-tam_Taml': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'sat_Olck-tel_Telu': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'sat_Olck-urd_Arab': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 7, 'average_sentence1_length': 58.94, 'max_sentence1_length': 225, 'unique_sentence1': 1500, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'snd_Deva-asm_Beng': {'num_samples': 1503, 'number_of_characters': 162623, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'snd_Deva-ben_Beng': {'num_samples': 1503, 'number_of_characters': 157027, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'snd_Deva-brx_Deva': {'num_samples': 1503, 'number_of_characters': 163083, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'snd_Deva-doi_Deva': {'num_samples': 1503, 'number_of_characters': 168071, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'snd_Deva-eng_Latn': {'num_samples': 1503, 'number_of_characters': 161755, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'snd_Deva-gom_Deva': {'num_samples': 1503, 'number_of_characters': 157321, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'snd_Deva-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 159308, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'snd_Deva-hin_Deva': {'num_samples': 1503, 'number_of_characters': 161003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'snd_Deva-kan_Knda': {'num_samples': 1503, 'number_of_characters': 166216, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'snd_Deva-kas_Arab': {'num_samples': 1503, 'number_of_characters': 165720, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'snd_Deva-mai_Deva': {'num_samples': 1503, 'number_of_characters': 163447, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'snd_Deva-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 173877, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'snd_Deva-mar_Deva': {'num_samples': 1503, 'number_of_characters': 163786, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'snd_Deva-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 158355, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'snd_Deva-npi_Deva': {'num_samples': 1503, 'number_of_characters': 161945, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'snd_Deva-ory_Orya': {'num_samples': 1503, 'number_of_characters': 165262, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'snd_Deva-pan_Guru': {'num_samples': 1503, 'number_of_characters': 161240, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'snd_Deva-san_Deva': {'num_samples': 1503, 'number_of_characters': 159132, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'snd_Deva-sat_Olck': {'num_samples': 1503, 'number_of_characters': 170418, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'snd_Deva-tam_Taml': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'snd_Deva-tel_Telu': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'snd_Deva-urd_Arab': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 54.45, 'max_sentence1_length': 195, 'unique_sentence1': 1490, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tam_Taml-asm_Beng': {'num_samples': 1503, 'number_of_characters': 174866, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tam_Taml-ben_Beng': {'num_samples': 1503, 'number_of_characters': 169270, 'unique_pairs': 1501, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tam_Taml-brx_Deva': {'num_samples': 1503, 'number_of_characters': 175326, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tam_Taml-doi_Deva': {'num_samples': 1503, 'number_of_characters': 180314, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tam_Taml-eng_Latn': {'num_samples': 1503, 'number_of_characters': 173998, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tam_Taml-gom_Deva': {'num_samples': 1503, 'number_of_characters': 169564, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tam_Taml-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 171551, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tam_Taml-hin_Deva': {'num_samples': 1503, 'number_of_characters': 173246, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tam_Taml-kan_Knda': {'num_samples': 1503, 'number_of_characters': 178459, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tam_Taml-kas_Arab': {'num_samples': 1503, 'number_of_characters': 177963, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tam_Taml-mai_Deva': {'num_samples': 1503, 'number_of_characters': 175690, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tam_Taml-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 186120, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tam_Taml-mar_Deva': {'num_samples': 1503, 'number_of_characters': 176029, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tam_Taml-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 170598, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tam_Taml-npi_Deva': {'num_samples': 1503, 'number_of_characters': 174188, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tam_Taml-ory_Orya': {'num_samples': 1503, 'number_of_characters': 177505, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tam_Taml-pan_Guru': {'num_samples': 1503, 'number_of_characters': 173483, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tam_Taml-san_Deva': {'num_samples': 1503, 'number_of_characters': 171375, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tam_Taml-sat_Olck': {'num_samples': 1503, 'number_of_characters': 182661, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tam_Taml-snd_Deva': {'num_samples': 1503, 'number_of_characters': 175905, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tam_Taml-tel_Telu': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}, 'tam_Taml-urd_Arab': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 3, 'average_sentence1_length': 62.59, 'max_sentence1_length': 224, 'unique_sentence1': 1492, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'tel_Telu-asm_Beng': {'num_samples': 1503, 'number_of_characters': 157690, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'tel_Telu-ben_Beng': {'num_samples': 1503, 'number_of_characters': 152094, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'tel_Telu-brx_Deva': {'num_samples': 1503, 'number_of_characters': 158150, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'tel_Telu-doi_Deva': {'num_samples': 1503, 'number_of_characters': 163138, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'tel_Telu-eng_Latn': {'num_samples': 1503, 'number_of_characters': 156822, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'tel_Telu-gom_Deva': {'num_samples': 1503, 'number_of_characters': 152388, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'tel_Telu-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 154375, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'tel_Telu-hin_Deva': {'num_samples': 1503, 'number_of_characters': 156070, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'tel_Telu-kan_Knda': {'num_samples': 1503, 'number_of_characters': 161283, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'tel_Telu-kas_Arab': {'num_samples': 1503, 'number_of_characters': 160787, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'tel_Telu-mai_Deva': {'num_samples': 1503, 'number_of_characters': 158514, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'tel_Telu-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 168944, 'unique_pairs': 1500, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'tel_Telu-mar_Deva': {'num_samples': 1503, 'number_of_characters': 158853, 'unique_pairs': 1503, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'tel_Telu-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 153422, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'tel_Telu-npi_Deva': {'num_samples': 1503, 'number_of_characters': 157012, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'tel_Telu-ory_Orya': {'num_samples': 1503, 'number_of_characters': 160329, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'tel_Telu-pan_Guru': {'num_samples': 1503, 'number_of_characters': 156307, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'tel_Telu-san_Deva': {'num_samples': 1503, 'number_of_characters': 154199, 'unique_pairs': 1501, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'tel_Telu-sat_Olck': {'num_samples': 1503, 'number_of_characters': 165485, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'tel_Telu-snd_Deva': {'num_samples': 1503, 'number_of_characters': 158729, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'tel_Telu-tam_Taml': {'num_samples': 1503, 'number_of_characters': 170972, 'unique_pairs': 1502, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'tel_Telu-urd_Arab': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 6, 'average_sentence1_length': 51.16, 'max_sentence1_length': 182, 'unique_sentence1': 1495, 'min_sentence2_length': 4, 'average_sentence2_length': 53.57, 'max_sentence2_length': 206, 'unique_sentence2': 1498}, 'urd_Arab-asm_Beng': {'num_samples': 1503, 'number_of_characters': 161305, 'unique_pairs': 1498, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.75, 'max_sentence2_length': 208, 'unique_sentence2': 1497}, 'urd_Arab-ben_Beng': {'num_samples': 1503, 'number_of_characters': 155709, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.03, 'max_sentence2_length': 178, 'unique_sentence2': 1497}, 'urd_Arab-brx_Deva': {'num_samples': 1503, 'number_of_characters': 161765, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.06, 'max_sentence2_length': 210, 'unique_sentence2': 1498}, 'urd_Arab-doi_Deva': {'num_samples': 1503, 'number_of_characters': 166753, 'unique_pairs': 1500, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 57.38, 'max_sentence2_length': 209, 'unique_sentence2': 1499}, 'urd_Arab-eng_Latn': {'num_samples': 1503, 'number_of_characters': 160437, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.18, 'max_sentence2_length': 201, 'unique_sentence2': 1497}, 'urd_Arab-gom_Deva': {'num_samples': 1503, 'number_of_characters': 156003, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 50.23, 'max_sentence2_length': 203, 'unique_sentence2': 1500}, 'urd_Arab-guj_Gujr': {'num_samples': 1503, 'number_of_characters': 157990, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 51.55, 'max_sentence2_length': 205, 'unique_sentence2': 1500}, 'urd_Arab-hin_Deva': {'num_samples': 1503, 'number_of_characters': 159685, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.68, 'max_sentence2_length': 192, 'unique_sentence2': 1497}, 'urd_Arab-kan_Knda': {'num_samples': 1503, 'number_of_characters': 164898, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 56.14, 'max_sentence2_length': 201, 'unique_sentence2': 1499}, 'urd_Arab-kas_Arab': {'num_samples': 1503, 'number_of_characters': 164402, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 55.81, 'max_sentence2_length': 203, 'unique_sentence2': 1502}, 'urd_Arab-mai_Deva': {'num_samples': 1503, 'number_of_characters': 162129, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 54.3, 'max_sentence2_length': 230, 'unique_sentence2': 1499}, 'urd_Arab-mal_Mlym': {'num_samples': 1503, 'number_of_characters': 172559, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 5, 'average_sentence2_length': 61.24, 'max_sentence2_length': 219, 'unique_sentence2': 1495}, 'urd_Arab-mar_Deva': {'num_samples': 1503, 'number_of_characters': 162468, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.53, 'max_sentence2_length': 221, 'unique_sentence2': 1501}, 'urd_Arab-mni_Mtei': {'num_samples': 1503, 'number_of_characters': 157037, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 50.91, 'max_sentence2_length': 239, 'unique_sentence2': 1498}, 'urd_Arab-npi_Deva': {'num_samples': 1503, 'number_of_characters': 160627, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 53.3, 'max_sentence2_length': 223, 'unique_sentence2': 1497}, 'urd_Arab-ory_Orya': {'num_samples': 1503, 'number_of_characters': 163944, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 55.51, 'max_sentence2_length': 195, 'unique_sentence2': 1500}, 'urd_Arab-pan_Guru': {'num_samples': 1503, 'number_of_characters': 159922, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 52.83, 'max_sentence2_length': 221, 'unique_sentence2': 1495}, 'urd_Arab-san_Deva': {'num_samples': 1503, 'number_of_characters': 157814, 'unique_pairs': 1501, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 51.43, 'max_sentence2_length': 181, 'unique_sentence2': 1500}, 'urd_Arab-sat_Olck': {'num_samples': 1503, 'number_of_characters': 169100, 'unique_pairs': 1502, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 7, 'average_sentence2_length': 58.94, 'max_sentence2_length': 225, 'unique_sentence2': 1500}, 'urd_Arab-snd_Deva': {'num_samples': 1503, 'number_of_characters': 162344, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 4, 'average_sentence2_length': 54.45, 'max_sentence2_length': 195, 'unique_sentence2': 1490}, 'urd_Arab-tam_Taml': {'num_samples': 1503, 'number_of_characters': 174587, 'unique_pairs': 1503, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 3, 'average_sentence2_length': 62.59, 'max_sentence2_length': 224, 'unique_sentence2': 1492}, 'urd_Arab-tel_Telu': {'num_samples': 1503, 'number_of_characters': 157411, 'unique_pairs': 1499, 'min_sentence1_length': 4, 'average_sentence1_length': 53.57, 'max_sentence1_length': 206, 'unique_sentence1': 1498, 'min_sentence2_length': 6, 'average_sentence2_length': 51.16, 'max_sentence2_length': 182, 'unique_sentence2': 1495}}}}
IN22GenBitextMining (Jay Gala, 2023) ['asm', 'ben', 'brx', 'doi', 'eng', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] BitextMining s2s [Government, Legal, News, Non-fiction, Religious, Web, Written] {'test': 518144} {'test': {'num_samples': 518144, 'number_of_characters': 162367876, 'unique_pairs': 518101, 'min_sentence1_length': 9, 'average_sentence1_length': 156.68, 'max_sentence1_length': 692, 'unique_sentence1': 23550, 'min_sentence2_length': 9, 'average_sentence2_length': 156.68, 'max_sentence2_length': 692, 'unique_sentence2': 23550, 'hf_subset_descriptive_stats': {'asm_Beng-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'asm_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'asm_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'asm_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'asm_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'asm_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'asm_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'asm_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'asm_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'asm_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'asm_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'asm_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'asm_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'asm_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'asm_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'asm_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'asm_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'asm_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'asm_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'asm_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'asm_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'asm_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 156.7, 'max_sentence1_length': 582, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ben_Beng-asm_Beng': {'num_samples': 1024, 'number_of_characters': 310622, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ben_Beng-brx_Deva': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ben_Beng-doi_Deva': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ben_Beng-eng_Latn': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ben_Beng-gom_Deva': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ben_Beng-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ben_Beng-hin_Deva': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ben_Beng-kan_Knda': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ben_Beng-kas_Arab': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ben_Beng-mai_Deva': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ben_Beng-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ben_Beng-mar_Deva': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ben_Beng-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ben_Beng-npi_Deva': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ben_Beng-ory_Orya': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'ben_Beng-pan_Guru': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ben_Beng-san_Deva': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ben_Beng-sat_Olck': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ben_Beng-snd_Deva': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ben_Beng-tam_Taml': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ben_Beng-tel_Telu': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ben_Beng-urd_Arab': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 146.64, 'max_sentence1_length': 538, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'brx_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 323609, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'brx_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 313313, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'brx_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'brx_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'brx_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'brx_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'brx_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'brx_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'brx_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'brx_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'brx_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'brx_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'brx_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'brx_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'brx_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'brx_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'brx_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'brx_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'brx_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'brx_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'brx_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'brx_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 159.33, 'max_sentence1_length': 631, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'doi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319020, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'doi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308724, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'doi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321711, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'doi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'doi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'doi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'doi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'doi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'doi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'doi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'doi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'doi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'doi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'doi_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'doi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'doi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'doi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'doi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'doi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'doi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'doi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'doi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.84, 'max_sentence1_length': 500, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'eng_Latn-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320098, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'eng_Latn-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309802, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'eng_Latn-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322789, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'eng_Latn-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318200, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'eng_Latn-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'eng_Latn-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'eng_Latn-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'eng_Latn-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'eng_Latn-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'eng_Latn-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'eng_Latn-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'eng_Latn-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'eng_Latn-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'eng_Latn-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'eng_Latn-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'eng_Latn-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'eng_Latn-san_Deva': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'eng_Latn-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'eng_Latn-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'eng_Latn-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'eng_Latn-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'eng_Latn-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 155.9, 'max_sentence1_length': 532, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'gom_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 312594, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'gom_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302298, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'gom_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315285, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'gom_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 310696, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'gom_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 311774, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'gom_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'gom_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'gom_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'gom_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'gom_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'gom_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'gom_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'gom_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'gom_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'gom_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'gom_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'gom_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'gom_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'gom_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'gom_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'gom_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'gom_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 17, 'average_sentence1_length': 148.57, 'max_sentence1_length': 537, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'guj_Gujr-asm_Beng': {'num_samples': 1024, 'number_of_characters': 309440, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'guj_Gujr-ben_Beng': {'num_samples': 1024, 'number_of_characters': 299144, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'guj_Gujr-brx_Deva': {'num_samples': 1024, 'number_of_characters': 312131, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'guj_Gujr-doi_Deva': {'num_samples': 1024, 'number_of_characters': 307542, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'guj_Gujr-eng_Latn': {'num_samples': 1024, 'number_of_characters': 308620, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'guj_Gujr-gom_Deva': {'num_samples': 1024, 'number_of_characters': 301116, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'guj_Gujr-hin_Deva': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'guj_Gujr-kan_Knda': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'guj_Gujr-kas_Arab': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'guj_Gujr-mai_Deva': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'guj_Gujr-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'guj_Gujr-mar_Deva': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'guj_Gujr-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'guj_Gujr-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'guj_Gujr-ory_Orya': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'guj_Gujr-pan_Guru': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'guj_Gujr-san_Deva': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'guj_Gujr-sat_Olck': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'guj_Gujr-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'guj_Gujr-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'guj_Gujr-tel_Telu': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'guj_Gujr-urd_Arab': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 145.49, 'max_sentence1_length': 488, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'hin_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320106, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'hin_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 309810, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'hin_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 322797, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'hin_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318208, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'hin_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319286, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'hin_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 311782, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'hin_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308628, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'hin_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'hin_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'hin_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'hin_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'hin_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'hin_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'hin_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'hin_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'hin_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'hin_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'hin_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'hin_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'hin_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'hin_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'hin_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 21, 'average_sentence1_length': 155.91, 'max_sentence1_length': 531, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kan_Knda-asm_Beng': {'num_samples': 1024, 'number_of_characters': 332064, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kan_Knda-ben_Beng': {'num_samples': 1024, 'number_of_characters': 321768, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kan_Knda-brx_Deva': {'num_samples': 1024, 'number_of_characters': 334755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kan_Knda-doi_Deva': {'num_samples': 1024, 'number_of_characters': 330166, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kan_Knda-eng_Latn': {'num_samples': 1024, 'number_of_characters': 331244, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kan_Knda-gom_Deva': {'num_samples': 1024, 'number_of_characters': 323740, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kan_Knda-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 320586, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kan_Knda-hin_Deva': {'num_samples': 1024, 'number_of_characters': 331252, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kan_Knda-kas_Arab': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'kan_Knda-mai_Deva': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kan_Knda-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kan_Knda-mar_Deva': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kan_Knda-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kan_Knda-npi_Deva': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kan_Knda-ory_Orya': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kan_Knda-pan_Guru': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kan_Knda-san_Deva': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kan_Knda-sat_Olck': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kan_Knda-snd_Deva': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kan_Knda-tam_Taml': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kan_Knda-tel_Telu': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kan_Knda-urd_Arab': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 167.58, 'max_sentence1_length': 668, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'kas_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 322764, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'kas_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 312468, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'kas_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 325455, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'kas_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 320866, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'kas_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 321944, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'kas_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 314440, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'kas_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 311286, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'kas_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 321952, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'kas_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 333910, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'kas_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'kas_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'kas_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'kas_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'kas_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'kas_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'kas_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'kas_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'kas_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'kas_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'kas_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'kas_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'kas_Arab-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 158.5, 'max_sentence1_length': 520, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mai_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 308682, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mai_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 298386, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mai_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 311373, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mai_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 306784, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mai_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 307862, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mai_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 300358, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mai_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 297204, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mai_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 307870, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mai_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 319828, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mai_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 310528, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mai_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mai_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mai_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mai_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mai_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mai_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mai_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mai_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mai_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mai_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mai_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mai_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 144.75, 'max_sentence1_length': 562, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mal_Mlym-asm_Beng': {'num_samples': 1024, 'number_of_characters': 343636, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mal_Mlym-ben_Beng': {'num_samples': 1024, 'number_of_characters': 333340, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mal_Mlym-brx_Deva': {'num_samples': 1024, 'number_of_characters': 346327, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mal_Mlym-doi_Deva': {'num_samples': 1024, 'number_of_characters': 341738, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mal_Mlym-eng_Latn': {'num_samples': 1024, 'number_of_characters': 342816, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mal_Mlym-gom_Deva': {'num_samples': 1024, 'number_of_characters': 335312, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mal_Mlym-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 332158, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mal_Mlym-hin_Deva': {'num_samples': 1024, 'number_of_characters': 342824, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mal_Mlym-kan_Knda': {'num_samples': 1024, 'number_of_characters': 354782, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mal_Mlym-kas_Arab': {'num_samples': 1024, 'number_of_characters': 345482, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mal_Mlym-mai_Deva': {'num_samples': 1024, 'number_of_characters': 331400, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mal_Mlym-mar_Deva': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mal_Mlym-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mal_Mlym-npi_Deva': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mal_Mlym-ory_Orya': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mal_Mlym-pan_Guru': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mal_Mlym-san_Deva': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mal_Mlym-sat_Olck': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mal_Mlym-snd_Deva': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mal_Mlym-tam_Taml': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mal_Mlym-tel_Telu': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mal_Mlym-urd_Arab': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 178.88, 'max_sentence1_length': 692, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mar_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 321784, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mar_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 311488, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mar_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 324475, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mar_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 319886, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mar_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 320964, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mar_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 313460, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mar_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 310306, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mar_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 320972, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mar_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 332930, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mar_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 323630, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mar_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 309548, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mar_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 344502, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mar_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'mar_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mar_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mar_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mar_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mar_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mar_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mar_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mar_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mar_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 157.54, 'max_sentence1_length': 555, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'mni_Mtei-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313134, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'mni_Mtei-ben_Beng': {'num_samples': 1024, 'number_of_characters': 302838, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'mni_Mtei-brx_Deva': {'num_samples': 1024, 'number_of_characters': 315825, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'mni_Mtei-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311236, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'mni_Mtei-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312314, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'mni_Mtei-gom_Deva': {'num_samples': 1024, 'number_of_characters': 304810, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'mni_Mtei-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301656, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'mni_Mtei-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312322, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'mni_Mtei-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324280, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'mni_Mtei-kas_Arab': {'num_samples': 1024, 'number_of_characters': 314980, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'mni_Mtei-mai_Deva': {'num_samples': 1024, 'number_of_characters': 300898, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'mni_Mtei-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 335852, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'mni_Mtei-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314000, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'mni_Mtei-npi_Deva': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'mni_Mtei-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'mni_Mtei-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'mni_Mtei-san_Deva': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'mni_Mtei-sat_Olck': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'mni_Mtei-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'mni_Mtei-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'mni_Mtei-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'mni_Mtei-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 16, 'average_sentence1_length': 149.1, 'max_sentence1_length': 597, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'npi_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 313419, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'npi_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 303123, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'npi_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 316110, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'npi_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 311521, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'npi_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 312599, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'npi_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 305095, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'npi_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 301941, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'npi_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 312607, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'npi_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 324565, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'npi_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 315265, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'npi_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 301183, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'npi_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 336137, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'npi_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 314285, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'npi_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 305635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'npi_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'npi_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'npi_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'npi_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'npi_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'npi_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'npi_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'npi_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 149.38, 'max_sentence1_length': 525, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'ory_Orya-asm_Beng': {'num_samples': 1024, 'number_of_characters': 334226, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'ory_Orya-ben_Beng': {'num_samples': 1024, 'number_of_characters': 323930, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'ory_Orya-brx_Deva': {'num_samples': 1024, 'number_of_characters': 336917, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'ory_Orya-doi_Deva': {'num_samples': 1024, 'number_of_characters': 332328, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'ory_Orya-eng_Latn': {'num_samples': 1024, 'number_of_characters': 333406, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'ory_Orya-gom_Deva': {'num_samples': 1024, 'number_of_characters': 325902, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'ory_Orya-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 322748, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'ory_Orya-hin_Deva': {'num_samples': 1024, 'number_of_characters': 333414, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'ory_Orya-kan_Knda': {'num_samples': 1024, 'number_of_characters': 345372, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'ory_Orya-kas_Arab': {'num_samples': 1024, 'number_of_characters': 336072, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'ory_Orya-mai_Deva': {'num_samples': 1024, 'number_of_characters': 321990, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'ory_Orya-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 356944, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'ory_Orya-mar_Deva': {'num_samples': 1024, 'number_of_characters': 335092, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'ory_Orya-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 326442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'ory_Orya-npi_Deva': {'num_samples': 1024, 'number_of_characters': 326727, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'ory_Orya-pan_Guru': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'ory_Orya-san_Deva': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'ory_Orya-sat_Olck': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'ory_Orya-snd_Deva': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'ory_Orya-tam_Taml': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'ory_Orya-tel_Telu': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'ory_Orya-urd_Arab': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 10, 'average_sentence1_length': 169.69, 'max_sentence1_length': 578, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'pan_Guru-asm_Beng': {'num_samples': 1024, 'number_of_characters': 306863, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'pan_Guru-ben_Beng': {'num_samples': 1024, 'number_of_characters': 296567, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'pan_Guru-brx_Deva': {'num_samples': 1024, 'number_of_characters': 309554, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'pan_Guru-doi_Deva': {'num_samples': 1024, 'number_of_characters': 304965, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'pan_Guru-eng_Latn': {'num_samples': 1024, 'number_of_characters': 306043, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'pan_Guru-gom_Deva': {'num_samples': 1024, 'number_of_characters': 298539, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'pan_Guru-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 295385, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'pan_Guru-hin_Deva': {'num_samples': 1024, 'number_of_characters': 306051, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'pan_Guru-kan_Knda': {'num_samples': 1024, 'number_of_characters': 318009, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'pan_Guru-kas_Arab': {'num_samples': 1024, 'number_of_characters': 308709, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'pan_Guru-mai_Deva': {'num_samples': 1024, 'number_of_characters': 294627, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'pan_Guru-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 329581, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'pan_Guru-mar_Deva': {'num_samples': 1024, 'number_of_characters': 307729, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'pan_Guru-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 299079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'pan_Guru-npi_Deva': {'num_samples': 1024, 'number_of_characters': 299364, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'pan_Guru-ory_Orya': {'num_samples': 1024, 'number_of_characters': 320171, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'pan_Guru-san_Deva': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'pan_Guru-sat_Olck': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'pan_Guru-snd_Deva': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'pan_Guru-tam_Taml': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'pan_Guru-tel_Telu': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'pan_Guru-urd_Arab': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 19, 'average_sentence1_length': 142.97, 'max_sentence1_length': 476, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'san_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 318079, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'san_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 307783, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'san_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 320770, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'san_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 316181, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'san_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 317259, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'san_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 309755, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'san_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 306601, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'san_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 317267, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'san_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 329225, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'san_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 319925, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'san_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 305843, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'san_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 340797, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'san_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 318945, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'san_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 310295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'san_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 310580, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'san_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 331387, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'san_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304024, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'san_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'san_Deva-snd_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'san_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'san_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'san_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 9, 'average_sentence1_length': 153.93, 'max_sentence1_length': 601, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'sat_Olck-asm_Beng': {'num_samples': 1024, 'number_of_characters': 326732, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'sat_Olck-ben_Beng': {'num_samples': 1024, 'number_of_characters': 316436, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'sat_Olck-brx_Deva': {'num_samples': 1024, 'number_of_characters': 329423, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'sat_Olck-doi_Deva': {'num_samples': 1024, 'number_of_characters': 324834, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'sat_Olck-eng_Latn': {'num_samples': 1024, 'number_of_characters': 325912, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'sat_Olck-gom_Deva': {'num_samples': 1024, 'number_of_characters': 318408, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'sat_Olck-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 315254, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'sat_Olck-hin_Deva': {'num_samples': 1024, 'number_of_characters': 325920, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'sat_Olck-kan_Knda': {'num_samples': 1024, 'number_of_characters': 337878, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'sat_Olck-kas_Arab': {'num_samples': 1024, 'number_of_characters': 328578, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'sat_Olck-mai_Deva': {'num_samples': 1024, 'number_of_characters': 314496, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'sat_Olck-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 349450, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'sat_Olck-mar_Deva': {'num_samples': 1024, 'number_of_characters': 327598, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'sat_Olck-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 318948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'sat_Olck-npi_Deva': {'num_samples': 1024, 'number_of_characters': 319233, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'sat_Olck-ory_Orya': {'num_samples': 1024, 'number_of_characters': 340040, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'sat_Olck-pan_Guru': {'num_samples': 1024, 'number_of_characters': 312677, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'sat_Olck-san_Deva': {'num_samples': 1024, 'number_of_characters': 323893, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'sat_Olck-snd_Deva': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'sat_Olck-tam_Taml': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'sat_Olck-tel_Telu': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'sat_Olck-urd_Arab': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 11, 'average_sentence1_length': 162.38, 'max_sentence1_length': 536, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'snd_Deva-asm_Beng': {'num_samples': 1024, 'number_of_characters': 320421, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'snd_Deva-ben_Beng': {'num_samples': 1024, 'number_of_characters': 310125, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'snd_Deva-brx_Deva': {'num_samples': 1024, 'number_of_characters': 323112, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'snd_Deva-doi_Deva': {'num_samples': 1024, 'number_of_characters': 318523, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'snd_Deva-eng_Latn': {'num_samples': 1024, 'number_of_characters': 319601, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'snd_Deva-gom_Deva': {'num_samples': 1024, 'number_of_characters': 312097, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'snd_Deva-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 308943, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'snd_Deva-hin_Deva': {'num_samples': 1024, 'number_of_characters': 319609, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'snd_Deva-kan_Knda': {'num_samples': 1024, 'number_of_characters': 331567, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'snd_Deva-kas_Arab': {'num_samples': 1024, 'number_of_characters': 322267, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'snd_Deva-mai_Deva': {'num_samples': 1024, 'number_of_characters': 308185, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'snd_Deva-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 343139, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'snd_Deva-mar_Deva': {'num_samples': 1024, 'number_of_characters': 321287, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'snd_Deva-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 312637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'snd_Deva-npi_Deva': {'num_samples': 1024, 'number_of_characters': 312922, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'snd_Deva-ory_Orya': {'num_samples': 1024, 'number_of_characters': 333729, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'snd_Deva-pan_Guru': {'num_samples': 1024, 'number_of_characters': 306366, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'snd_Deva-san_Deva': {'num_samples': 1024, 'number_of_characters': 317582, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'snd_Deva-sat_Olck': {'num_samples': 1024, 'number_of_characters': 326235, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'snd_Deva-tam_Taml': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'snd_Deva-tel_Telu': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'snd_Deva-urd_Arab': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 18, 'average_sentence1_length': 156.21, 'max_sentence1_length': 545, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tam_Taml-asm_Beng': {'num_samples': 1024, 'number_of_characters': 348346, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tam_Taml-ben_Beng': {'num_samples': 1024, 'number_of_characters': 338050, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tam_Taml-brx_Deva': {'num_samples': 1024, 'number_of_characters': 351037, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tam_Taml-doi_Deva': {'num_samples': 1024, 'number_of_characters': 346448, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tam_Taml-eng_Latn': {'num_samples': 1024, 'number_of_characters': 347526, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tam_Taml-gom_Deva': {'num_samples': 1024, 'number_of_characters': 340022, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tam_Taml-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 336868, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tam_Taml-hin_Deva': {'num_samples': 1024, 'number_of_characters': 347534, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tam_Taml-kan_Knda': {'num_samples': 1024, 'number_of_characters': 359492, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tam_Taml-kas_Arab': {'num_samples': 1024, 'number_of_characters': 350192, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tam_Taml-mai_Deva': {'num_samples': 1024, 'number_of_characters': 336110, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tam_Taml-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 371064, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tam_Taml-mar_Deva': {'num_samples': 1024, 'number_of_characters': 349212, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tam_Taml-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 340562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tam_Taml-npi_Deva': {'num_samples': 1024, 'number_of_characters': 340847, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tam_Taml-ory_Orya': {'num_samples': 1024, 'number_of_characters': 361654, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tam_Taml-pan_Guru': {'num_samples': 1024, 'number_of_characters': 334291, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tam_Taml-san_Deva': {'num_samples': 1024, 'number_of_characters': 345507, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tam_Taml-sat_Olck': {'num_samples': 1024, 'number_of_characters': 354160, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tam_Taml-snd_Deva': {'num_samples': 1024, 'number_of_characters': 347849, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tam_Taml-tel_Telu': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}, 'tam_Taml-urd_Arab': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 32, 'average_sentence1_length': 183.48, 'max_sentence1_length': 614, 'unique_sentence1': 1023, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'tel_Telu-asm_Beng': {'num_samples': 1024, 'number_of_characters': 319045, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'tel_Telu-ben_Beng': {'num_samples': 1024, 'number_of_characters': 308749, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'tel_Telu-brx_Deva': {'num_samples': 1024, 'number_of_characters': 321736, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'tel_Telu-doi_Deva': {'num_samples': 1024, 'number_of_characters': 317147, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'tel_Telu-eng_Latn': {'num_samples': 1024, 'number_of_characters': 318225, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'tel_Telu-gom_Deva': {'num_samples': 1024, 'number_of_characters': 310721, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'tel_Telu-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 307567, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'tel_Telu-hin_Deva': {'num_samples': 1024, 'number_of_characters': 318233, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'tel_Telu-kan_Knda': {'num_samples': 1024, 'number_of_characters': 330191, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'tel_Telu-kas_Arab': {'num_samples': 1024, 'number_of_characters': 320891, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'tel_Telu-mai_Deva': {'num_samples': 1024, 'number_of_characters': 306809, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'tel_Telu-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 341763, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'tel_Telu-mar_Deva': {'num_samples': 1024, 'number_of_characters': 319911, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'tel_Telu-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 311261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'tel_Telu-npi_Deva': {'num_samples': 1024, 'number_of_characters': 311546, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'tel_Telu-ory_Orya': {'num_samples': 1024, 'number_of_characters': 332353, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'tel_Telu-pan_Guru': {'num_samples': 1024, 'number_of_characters': 304990, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'tel_Telu-san_Deva': {'num_samples': 1024, 'number_of_characters': 316206, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'tel_Telu-sat_Olck': {'num_samples': 1024, 'number_of_characters': 324859, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'tel_Telu-snd_Deva': {'num_samples': 1024, 'number_of_characters': 318548, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'tel_Telu-tam_Taml': {'num_samples': 1024, 'number_of_characters': 346473, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'tel_Telu-urd_Arab': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 14, 'average_sentence1_length': 154.87, 'max_sentence1_length': 658, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 151.05, 'max_sentence2_length': 574, 'unique_sentence2': 1024}, 'urd_Arab-asm_Beng': {'num_samples': 1024, 'number_of_characters': 315134, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 156.7, 'max_sentence2_length': 582, 'unique_sentence2': 1024}, 'urd_Arab-ben_Beng': {'num_samples': 1024, 'number_of_characters': 304838, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 146.64, 'max_sentence2_length': 538, 'unique_sentence2': 1024}, 'urd_Arab-brx_Deva': {'num_samples': 1024, 'number_of_characters': 317825, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 159.33, 'max_sentence2_length': 631, 'unique_sentence2': 1024}, 'urd_Arab-doi_Deva': {'num_samples': 1024, 'number_of_characters': 313236, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.84, 'max_sentence2_length': 500, 'unique_sentence2': 1024}, 'urd_Arab-eng_Latn': {'num_samples': 1024, 'number_of_characters': 314314, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 155.9, 'max_sentence2_length': 532, 'unique_sentence2': 1024}, 'urd_Arab-gom_Deva': {'num_samples': 1024, 'number_of_characters': 306810, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 17, 'average_sentence2_length': 148.57, 'max_sentence2_length': 537, 'unique_sentence2': 1024}, 'urd_Arab-guj_Gujr': {'num_samples': 1024, 'number_of_characters': 303656, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 145.49, 'max_sentence2_length': 488, 'unique_sentence2': 1024}, 'urd_Arab-hin_Deva': {'num_samples': 1024, 'number_of_characters': 314322, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 21, 'average_sentence2_length': 155.91, 'max_sentence2_length': 531, 'unique_sentence2': 1024}, 'urd_Arab-kan_Knda': {'num_samples': 1024, 'number_of_characters': 326280, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 167.58, 'max_sentence2_length': 668, 'unique_sentence2': 1024}, 'urd_Arab-kas_Arab': {'num_samples': 1024, 'number_of_characters': 316980, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 158.5, 'max_sentence2_length': 520, 'unique_sentence2': 1024}, 'urd_Arab-mai_Deva': {'num_samples': 1024, 'number_of_characters': 302898, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 144.75, 'max_sentence2_length': 562, 'unique_sentence2': 1024}, 'urd_Arab-mal_Mlym': {'num_samples': 1024, 'number_of_characters': 337852, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 13, 'average_sentence2_length': 178.88, 'max_sentence2_length': 692, 'unique_sentence2': 1024}, 'urd_Arab-mar_Deva': {'num_samples': 1024, 'number_of_characters': 316000, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 157.54, 'max_sentence2_length': 555, 'unique_sentence2': 1024}, 'urd_Arab-mni_Mtei': {'num_samples': 1024, 'number_of_characters': 307350, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 16, 'average_sentence2_length': 149.1, 'max_sentence2_length': 597, 'unique_sentence2': 1024}, 'urd_Arab-npi_Deva': {'num_samples': 1024, 'number_of_characters': 307635, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 149.38, 'max_sentence2_length': 525, 'unique_sentence2': 1024}, 'urd_Arab-ory_Orya': {'num_samples': 1024, 'number_of_characters': 328442, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 10, 'average_sentence2_length': 169.69, 'max_sentence2_length': 578, 'unique_sentence2': 1024}, 'urd_Arab-pan_Guru': {'num_samples': 1024, 'number_of_characters': 301079, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 19, 'average_sentence2_length': 142.97, 'max_sentence2_length': 476, 'unique_sentence2': 1024}, 'urd_Arab-san_Deva': {'num_samples': 1024, 'number_of_characters': 312295, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 9, 'average_sentence2_length': 153.93, 'max_sentence2_length': 601, 'unique_sentence2': 1024}, 'urd_Arab-sat_Olck': {'num_samples': 1024, 'number_of_characters': 320948, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 11, 'average_sentence2_length': 162.38, 'max_sentence2_length': 536, 'unique_sentence2': 1024}, 'urd_Arab-snd_Deva': {'num_samples': 1024, 'number_of_characters': 314637, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 18, 'average_sentence2_length': 156.21, 'max_sentence2_length': 545, 'unique_sentence2': 1024}, 'urd_Arab-tam_Taml': {'num_samples': 1024, 'number_of_characters': 342562, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 32, 'average_sentence2_length': 183.48, 'max_sentence2_length': 614, 'unique_sentence2': 1023}, 'urd_Arab-tel_Telu': {'num_samples': 1024, 'number_of_characters': 313261, 'unique_pairs': 1024, 'min_sentence1_length': 13, 'average_sentence1_length': 151.05, 'max_sentence1_length': 574, 'unique_sentence1': 1024, 'min_sentence2_length': 14, 'average_sentence2_length': 154.87, 'max_sentence2_length': 658, 'unique_sentence2': 1024}}}}
IWSLT2017BitextMining ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'jpn', 'kor', 'nld', 'ron'] BitextMining s2s [Fiction, Non-fiction, Written] {'validation': 21938} {'validation': {'num_samples': 21938, 'number_of_characters': 4256244, 'unique_pairs': 21840, 'min_sentence1_length': 2, 'average_sentence1_length': 97.01, 'max_sentence1_length': 521, 'unique_sentence1': 11563, 'min_sentence2_length': 2, 'average_sentence2_length': 97.01, 'max_sentence2_length': 521, 'unique_sentence2': 11563, 'hf_subset_descriptive_stats': {'ar-en': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 4, 'average_sentence1_length': 85.49, 'max_sentence1_length': 369, 'unique_sentence1': 887, 'min_sentence2_length': 10, 'average_sentence2_length': 108.77, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'de-en': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 119.03, 'max_sentence1_length': 521, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.83, 'max_sentence2_length': 462, 'unique_sentence2': 881}, 'en-ar': {'num_samples': 888, 'number_of_characters': 172499, 'unique_pairs': 887, 'min_sentence1_length': 10, 'average_sentence1_length': 108.77, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 4, 'average_sentence2_length': 85.49, 'max_sentence2_length': 369, 'unique_sentence2': 887}, 'en-de': {'num_samples': 888, 'number_of_characters': 202336, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.83, 'max_sentence1_length': 462, 'unique_sentence1': 881, 'min_sentence2_length': 6, 'average_sentence2_length': 119.03, 'max_sentence2_length': 521, 'unique_sentence2': 881}, 'en-fr': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 10, 'average_sentence1_length': 108.41, 'max_sentence1_length': 462, 'unique_sentence1': 883, 'min_sentence2_length': 6, 'average_sentence2_length': 113.63, 'max_sentence2_length': 493, 'unique_sentence2': 881}, 'en-it': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 10, 'average_sentence1_length': 103.0, 'max_sentence1_length': 433, 'unique_sentence1': 922, 'min_sentence2_length': 7, 'average_sentence2_length': 103.46, 'max_sentence2_length': 444, 'unique_sentence2': 918}, 'en-ja': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 10, 'average_sentence1_length': 109.81, 'max_sentence1_length': 462, 'unique_sentence1': 864, 'min_sentence2_length': 5, 'average_sentence2_length': 42.59, 'max_sentence2_length': 225, 'unique_sentence2': 866}, 'en-ko': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 10, 'average_sentence1_length': 107.74, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 3, 'average_sentence2_length': 54.56, 'max_sentence2_length': 250, 'unique_sentence2': 872}, 'en-nl': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 10, 'average_sentence1_length': 95.27, 'max_sentence1_length': 433, 'unique_sentence1': 996, 'min_sentence2_length': 4, 'average_sentence2_length': 93.8, 'max_sentence2_length': 477, 'unique_sentence2': 1000}, 'en-ro': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 10, 'average_sentence1_length': 104.72, 'max_sentence1_length': 433, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.67, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'en-zh': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 10, 'average_sentence1_length': 109.37, 'max_sentence1_length': 462, 'unique_sentence1': 872, 'min_sentence2_length': 2, 'average_sentence2_length': 39.81, 'max_sentence2_length': 230, 'unique_sentence2': 867}, 'fr-en': {'num_samples': 890, 'number_of_characters': 197619, 'unique_pairs': 883, 'min_sentence1_length': 6, 'average_sentence1_length': 113.63, 'max_sentence1_length': 493, 'unique_sentence1': 881, 'min_sentence2_length': 10, 'average_sentence2_length': 108.41, 'max_sentence2_length': 462, 'unique_sentence2': 883}, 'it-en': {'num_samples': 929, 'number_of_characters': 191803, 'unique_pairs': 924, 'min_sentence1_length': 7, 'average_sentence1_length': 103.46, 'max_sentence1_length': 444, 'unique_sentence1': 918, 'min_sentence2_length': 10, 'average_sentence2_length': 103.0, 'max_sentence2_length': 433, 'unique_sentence2': 922}, 'it-nl': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.64, 'max_sentence1_length': 459, 'unique_sentence1': 994, 'min_sentence2_length': 7, 'average_sentence2_length': 94.03, 'max_sentence2_length': 505, 'unique_sentence2': 998}, 'it-ro': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 103.91, 'max_sentence1_length': 435, 'unique_sentence1': 907, 'min_sentence2_length': 9, 'average_sentence2_length': 107.62, 'max_sentence2_length': 448, 'unique_sentence2': 910}, 'ja-en': {'num_samples': 871, 'number_of_characters': 132742, 'unique_pairs': 867, 'min_sentence1_length': 5, 'average_sentence1_length': 42.59, 'max_sentence1_length': 225, 'unique_sentence1': 866, 'min_sentence2_length': 10, 'average_sentence2_length': 109.81, 'max_sentence2_length': 462, 'unique_sentence2': 864}, 'ko-en': {'num_samples': 879, 'number_of_characters': 142659, 'unique_pairs': 874, 'min_sentence1_length': 3, 'average_sentence1_length': 54.56, 'max_sentence1_length': 250, 'unique_sentence1': 872, 'min_sentence2_length': 10, 'average_sentence2_length': 107.74, 'max_sentence2_length': 462, 'unique_sentence2': 872}, 'nl-en': {'num_samples': 1003, 'number_of_characters': 189637, 'unique_pairs': 1000, 'min_sentence1_length': 4, 'average_sentence1_length': 93.8, 'max_sentence1_length': 477, 'unique_sentence1': 1000, 'min_sentence2_length': 10, 'average_sentence2_length': 95.27, 'max_sentence2_length': 433, 'unique_sentence2': 996}, 'nl-it': {'num_samples': 1001, 'number_of_characters': 188858, 'unique_pairs': 998, 'min_sentence1_length': 7, 'average_sentence1_length': 94.03, 'max_sentence1_length': 505, 'unique_sentence1': 998, 'min_sentence2_length': 7, 'average_sentence2_length': 94.64, 'max_sentence2_length': 459, 'unique_sentence2': 994}, 'nl-ro': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 7, 'average_sentence1_length': 102.02, 'max_sentence1_length': 478, 'unique_sentence1': 909, 'min_sentence2_length': 9, 'average_sentence2_length': 107.59, 'max_sentence2_length': 515, 'unique_sentence2': 909}, 'ro-en': {'num_samples': 914, 'number_of_characters': 194128, 'unique_pairs': 910, 'min_sentence1_length': 9, 'average_sentence1_length': 107.67, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 10, 'average_sentence2_length': 104.72, 'max_sentence2_length': 433, 'unique_sentence2': 907}, 'ro-it': {'num_samples': 914, 'number_of_characters': 193339, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.62, 'max_sentence1_length': 448, 'unique_sentence1': 910, 'min_sentence2_length': 7, 'average_sentence2_length': 103.91, 'max_sentence2_length': 435, 'unique_sentence2': 907}, 'ro-nl': {'num_samples': 913, 'number_of_characters': 191376, 'unique_pairs': 911, 'min_sentence1_length': 9, 'average_sentence1_length': 107.59, 'max_sentence1_length': 515, 'unique_sentence1': 909, 'min_sentence2_length': 7, 'average_sentence2_length': 102.02, 'max_sentence2_length': 478, 'unique_sentence2': 909}, 'zh-en': {'num_samples': 879, 'number_of_characters': 131126, 'unique_pairs': 877, 'min_sentence1_length': 2, 'average_sentence1_length': 39.81, 'max_sentence1_length': 230, 'unique_sentence1': 867, 'min_sentence2_length': 10, 'average_sentence2_length': 109.37, 'max_sentence2_length': 462, 'unique_sentence2': 872}}}}
ImageCoDeT2IMultiChoice (Krojer et al., 2022) ['eng'] Compositionality it2i [Web, Written] {'test': 25322} {'test': {'number_of_characters': 236457, 'num_samples': 25322, 'num_queries': 2302, 'num_documents': 23020, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 23020, 'min_query_length': 1, 'average_query_length': 102.72, 'max_query_length': 350, 'unique_queries': 2302, 'num_query_images': 0, 'min_query_image_width': 0, 'average_query_image_width': 0, 'max_query_image_width': 0, 'min_query_image_height': 0, 'average_query_image_height': 0, 'max_query_image_height': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10390}}
ImageCoDeT2IRetrieval (Krojer et al., 2022) ['eng'] Any2AnyRetrieval t2i [Web, Written] {'test': 25322} {'test': {'number_of_characters': 236457, 'num_samples': 25322, 'num_queries': 2302, 'num_documents': 23020, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 23020, 'min_query_length': 1, 'average_query_length': 102.72, 'max_query_length': 350, 'unique_queries': 2302, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2302}}
ImageNet10Clustering (Deng et al., 2009) ['eng'] ImageClustering i2t [Web] {'test': 13000} {'test': {'num_samples': 13000, 'unique_num_labels': 10, 'min_image_width': 224, 'average_image_width': 224.0, 'max_image_width': 224, 'min_image_height': 224, 'average_image_height': 224.0, 'max_image_height': 224, 'labels': {'0': {'count': 1300}, '1': {'count': 1300}, '2': {'count': 1300}, '3': {'count': 1300}, '4': {'count': 1300}, '5': {'count': 1300}, '6': {'count': 1300}, '7': {'count': 1300}, '8': {'count': 1300}, '10': {'count': 1300}}}}
ImageNetDog15Clustering (Deng et al., 2009) ['eng'] ImageClustering i2i [Web] {'test': 1076} {'test': {'num_samples': 1076, 'unique_num_labels': 15, 'min_image_width': 224, 'average_image_width': 224.0, 'max_image_width': 224, 'min_image_height': 224, 'average_image_height': 224.0, 'max_image_height': 224, 'labels': {'0': {'count': 152}, '1': {'count': 88}, '2': {'count': 75}, '3': {'count': 96}, '4': {'count': 57}, '5': {'count': 50}, '6': {'count': 52}, '7': {'count': 50}, '8': {'count': 50}, '9': {'count': 50}, '10': {'count': 53}, '11': {'count': 57}, '12': {'count': 50}, '13': {'count': 100}, '14': {'count': 96}}}}
Imagenet1k (Deng et al., 2009) ['eng'] ImageClassification i2i [Scene] {'test': 50000} {'test': {'num_samples': 50000, 'unique_num_labels': 1000, 'min_image_width': 54, 'average_image_width': 490.4, 'max_image_width': 4288, 'min_image_height': 56, 'average_image_height': 430.24, 'max_image_height': 5005, 'labels': {'0': {'count': 50}, '1': {'count': 50}, '2': {'count': 50}, '3': {'count': 50}, '4': {'count': 50}, '5': {'count': 50}, '6': {'count': 50}, '7': {'count': 50}, '8': {'count': 50}, '9': {'count': 50}, '10': {'count': 50}, '11': {'count': 50}, '12': {'count': 50}, '13': {'count': 50}, '14': {'count': 50}, '15': {'count': 50}, '16': {'count': 50}, '17': {'count': 50}, '18': {'count': 50}, '19': {'count': 50}, '20': {'count': 50}, '21': {'count': 50}, '22': {'count': 50}, '23': {'count': 50}, '24': {'count': 50}, '25': {'count': 50}, '26': {'count': 50}, '27': {'count': 50}, '28': {'count': 50}, '29': {'count': 50}, '30': {'count': 50}, '31': {'count': 50}, '32': {'count': 50}, '33': {'count': 50}, '34': {'count': 50}, '35': {'count': 50}, '36': {'count': 50}, '37': {'count': 50}, '38': {'count': 50}, '39': {'count': 50}, '40': {'count': 50}, '41': {'count': 50}, '42': {'count': 50}, '43': {'count': 50}, '44': {'count': 50}, '45': {'count': 50}, '46': {'count': 50}, '47': {'count': 50}, '48': {'count': 50}, '49': {'count': 50}, '50': {'count': 50}, '51': {'count': 50}, '52': {'count': 50}, '53': {'count': 50}, '54': {'count': 50}, '55': {'count': 50}, '56': {'count': 50}, '57': {'count': 50}, '58': {'count': 50}, '59': {'count': 50}, '60': {'count': 50}, '61': {'count': 50}, '62': {'count': 50}, '63': {'count': 50}, '64': {'count': 50}, '65': {'count': 50}, '66': {'count': 50}, '67': {'count': 50}, '68': {'count': 50}, '69': {'count': 50}, '70': {'count': 50}, '71': {'count': 50}, '72': {'count': 50}, '73': {'count': 50}, '74': {'count': 50}, '75': {'count': 50}, '76': {'count': 50}, '77': {'count': 50}, '78': {'count': 50}, '79': {'count': 50}, '80': {'count': 50}, '81': {'count': 50}, '82': {'count': 50}, '83': {'count': 50}, '84': {'count': 50}, '85': {'count': 50}, '86': {'count': 50}, '87': {'count': 50}, '88': {'count': 50}, '89': {'count': 50}, '90': {'count': 50}, '91': {'count': 50}, '92': {'count': 50}, '93': {'count': 50}, '94': {'count': 50}, '95': {'count': 50}, '96': {'count': 50}, '97': {'count': 50}, '98': {'count': 50}, '99': {'count': 50}, '100': {'count': 50}, '101': {'count': 50}, '102': {'count': 50}, '103': {'count': 50}, '104': {'count': 50}, '105': {'count': 50}, '106': {'count': 50}, '107': {'count': 50}, '108': {'count': 50}, '109': {'count': 50}, '110': {'count': 50}, '111': {'count': 50}, '112': {'count': 50}, '113': {'count': 50}, '114': {'count': 50}, '115': {'count': 50}, '116': {'count': 50}, '117': {'count': 50}, '118': {'count': 50}, '119': {'count': 50}, '120': {'count': 50}, '121': {'count': 50}, '122': {'count': 50}, '123': {'count': 50}, '124': {'count': 50}, '125': {'count': 50}, '126': {'count': 50}, '127': {'count': 50}, '128': {'count': 50}, '129': {'count': 50}, '130': {'count': 50}, '131': {'count': 50}, '132': {'count': 50}, '133': {'count': 50}, '134': {'count': 50}, '135': {'count': 50}, '136': {'count': 50}, '137': {'count': 50}, '138': {'count': 50}, '139': {'count': 50}, '140': {'count': 50}, '141': {'count': 50}, '142': {'count': 50}, '143': {'count': 50}, '144': {'count': 50}, '145': {'count': 50}, '146': {'count': 50}, '147': {'count': 50}, '148': {'count': 50}, '149': {'count': 50}, '150': {'count': 50}, '151': {'count': 50}, '152': {'count': 50}, '153': {'count': 50}, '154': {'count': 50}, '155': {'count': 50}, '156': {'count': 50}, '157': {'count': 50}, '158': {'count': 50}, '159': {'count': 50}, '160': {'count': 50}, '161': {'count': 50}, '162': {'count': 50}, '163': {'count': 50}, '164': {'count': 50}, '165': {'count': 50}, '166': {'count': 50}, '167': {'count': 50}, '168': {'count': 50}, '169': {'count': 50}, '170': {'count': 50}, '171': {'count': 50}, '172': {'count': 50}, '173': {'count': 50}, '174': {'count': 50}, '175': {'count': 50}, '176': {'count': 50}, '177': {'count': 50}, '178': {'count': 50}, '179': {'count': 50}, '180': {'count': 50}, '181': {'count': 50}, '182': {'count': 50}, '183': {'count': 50}, '184': {'count': 50}, '185': {'count': 50}, '186': {'count': 50}, '187': {'count': 50}, '188': {'count': 50}, '189': {'count': 50}, '190': {'count': 50}, '191': {'count': 50}, '192': {'count': 50}, '193': {'count': 50}, '194': {'count': 50}, '195': {'count': 50}, '196': {'count': 50}, '197': {'count': 50}, '198': {'count': 50}, '199': {'count': 50}, '200': {'count': 50}, '201': {'count': 50}, '202': {'count': 50}, '203': {'count': 50}, '204': {'count': 50}, '205': {'count': 50}, '206': {'count': 50}, '207': {'count': 50}, '208': {'count': 50}, '209': {'count': 50}, '210': {'count': 50}, '211': {'count': 50}, '212': {'count': 50}, '213': {'count': 50}, '214': {'count': 50}, '215': {'count': 50}, '216': {'count': 50}, '217': {'count': 50}, '218': {'count': 50}, '219': {'count': 50}, '220': {'count': 50}, '221': {'count': 50}, '222': {'count': 50}, '223': {'count': 50}, '224': {'count': 50}, '225': {'count': 50}, '226': {'count': 50}, '227': {'count': 50}, '228': {'count': 50}, '229': {'count': 50}, '230': {'count': 50}, '231': {'count': 50}, '232': {'count': 50}, '233': {'count': 50}, '234': {'count': 50}, '235': {'count': 50}, '236': {'count': 50}, '237': {'count': 50}, '238': {'count': 50}, '239': {'count': 50}, '240': {'count': 50}, '241': {'count': 50}, '242': {'count': 50}, '243': {'count': 50}, '244': {'count': 50}, '245': {'count': 50}, '246': {'count': 50}, '247': {'count': 50}, '248': {'count': 50}, '249': {'count': 50}, '250': {'count': 50}, '251': {'count': 50}, '252': {'count': 50}, '253': {'count': 50}, '254': {'count': 50}, '255': {'count': 50}, '256': {'count': 50}, '257': {'count': 50}, '258': {'count': 50}, '259': {'count': 50}, '260': {'count': 50}, '261': {'count': 50}, '262': {'count': 50}, '263': {'count': 50}, '264': {'count': 50}, '265': {'count': 50}, '266': {'count': 50}, '267': {'count': 50}, '268': {'count': 50}, '269': {'count': 50}, '270': {'count': 50}, '271': {'count': 50}, '272': {'count': 50}, '273': {'count': 50}, '274': {'count': 50}, '275': {'count': 50}, '276': {'count': 50}, '277': {'count': 50}, '278': {'count': 50}, '279': {'count': 50}, '280': {'count': 50}, '281': {'count': 50}, '282': {'count': 50}, '283': {'count': 50}, '284': {'count': 50}, '285': {'count': 50}, '286': {'count': 50}, '287': {'count': 50}, '288': {'count': 50}, '289': {'count': 50}, '290': {'count': 50}, '291': {'count': 50}, '292': {'count': 50}, '293': {'count': 50}, '294': {'count': 50}, '295': {'count': 50}, '296': {'count': 50}, '297': {'count': 50}, '298': {'count': 50}, '299': {'count': 50}, '300': {'count': 50}, '301': {'count': 50}, '302': {'count': 50}, '303': {'count': 50}, '304': {'count': 50}, '305': {'count': 50}, '306': {'count': 50}, '307': {'count': 50}, '308': {'count': 50}, '309': {'count': 50}, '310': {'count': 50}, '311': {'count': 50}, '312': {'count': 50}, '313': {'count': 50}, '314': {'count': 50}, '315': {'count': 50}, '316': {'count': 50}, '317': {'count': 50}, '318': {'count': 50}, '319': {'count': 50}, '320': {'count': 50}, '321': {'count': 50}, '322': {'count': 50}, '323': {'count': 50}, '324': {'count': 50}, '325': {'count': 50}, '326': {'count': 50}, '327': {'count': 50}, '328': {'count': 50}, '329': {'count': 50}, '330': {'count': 50}, '331': {'count': 50}, '332': {'count': 50}, '333': {'count': 50}, '334': {'count': 50}, '335': {'count': 50}, '336': {'count': 50}, '337': {'count': 50}, '338': {'count': 50}, '339': {'count': 50}, '340': {'count': 50}, '341': {'count': 50}, '342': {'count': 50}, '343': {'count': 50}, '344': {'count': 50}, '345': {'count': 50}, '346': {'count': 50}, '347': {'count': 50}, '348': {'count': 50}, '349': {'count': 50}, '350': {'count': 50}, '351': {'count': 50}, '352': {'count': 50}, '353': {'count': 50}, '354': {'count': 50}, '355': {'count': 50}, '356': {'count': 50}, '357': {'count': 50}, '358': {'count': 50}, '359': {'count': 50}, '360': {'count': 50}, '361': {'count': 50}, '362': {'count': 50}, '363': {'count': 50}, '364': {'count': 50}, '365': {'count': 50}, '366': {'count': 50}, '367': {'count': 50}, '368': {'count': 50}, '369': {'count': 50}, '370': {'count': 50}, '371': {'count': 50}, '372': {'count': 50}, '373': {'count': 50}, '374': {'count': 50}, '375': {'count': 50}, '376': {'count': 50}, '377': {'count': 50}, '378': {'count': 50}, '379': {'count': 50}, '380': {'count': 50}, '381': {'count': 50}, '382': {'count': 50}, '383': {'count': 50}, '384': {'count': 50}, '385': {'count': 50}, '386': {'count': 50}, '387': {'count': 50}, '388': {'count': 50}, '389': {'count': 50}, '390': {'count': 50}, '391': {'count': 50}, '392': {'count': 50}, '393': {'count': 50}, '394': {'count': 50}, '395': {'count': 50}, '396': {'count': 50}, '397': {'count': 50}, '398': {'count': 50}, '399': {'count': 50}, '400': {'count': 50}, '401': {'count': 50}, '402': {'count': 50}, '403': {'count': 50}, '404': {'count': 50}, '405': {'count': 50}, '406': {'count': 50}, '407': {'count': 50}, '408': {'count': 50}, '409': {'count': 50}, '410': {'count': 50}, '411': {'count': 50}, '412': {'count': 50}, '413': {'count': 50}, '414': {'count': 50}, '415': {'count': 50}, '416': {'count': 50}, '417': {'count': 50}, '418': {'count': 50}, '419': {'count': 50}, '420': {'count': 50}, '421': {'count': 50}, '422': {'count': 50}, '423': {'count': 50}, '424': {'count': 50}, '425': {'count': 50}, '426': {'count': 50}, '427': {'count': 50}, '428': {'count': 50}, '429': {'count': 50}, '430': {'count': 50}, '431': {'count': 50}, '432': {'count': 50}, '433': {'count': 50}, '434': {'count': 50}, '435': {'count': 50}, '436': {'count': 50}, '437': {'count': 50}, '438': {'count': 50}, '439': {'count': 50}, '440': {'count': 50}, '441': {'count': 50}, '442': {'count': 50}, '443': {'count': 50}, '444': {'count': 50}, '445': {'count': 50}, '446': {'count': 50}, '447': {'count': 50}, '448': {'count': 50}, '449': {'count': 50}, '450': {'count': 50}, '451': {'count': 50}, '452': {'count': 50}, '453': {'count': 50}, '454': {'count': 50}, '455': {'count': 50}, '456': {'count': 50}, '457': {'count': 50}, '458': {'count': 50}, '459': {'count': 50}, '460': {'count': 50}, '461': {'count': 50}, '462': {'count': 50}, '463': {'count': 50}, '464': {'count': 50}, '465': {'count': 50}, '466': {'count': 50}, '467': {'count': 50}, '468': {'count': 50}, '469': {'count': 50}, '470': {'count': 50}, '471': {'count': 50}, '472': {'count': 50}, '473': {'count': 50}, '474': {'count': 50}, '475': {'count': 50}, '476': {'count': 50}, '477': {'count': 50}, '478': {'count': 50}, '479': {'count': 50}, '480': {'count': 50}, '481': {'count': 50}, '482': {'count': 50}, '483': {'count': 50}, '484': {'count': 50}, '485': {'count': 50}, '486': {'count': 50}, '487': {'count': 50}, '488': {'count': 50}, '489': {'count': 50}, '490': {'count': 50}, '491': {'count': 50}, '492': {'count': 50}, '493': {'count': 50}, '494': {'count': 50}, '495': {'count': 50}, '496': {'count': 50}, '497': {'count': 50}, '498': {'count': 50}, '499': {'count': 50}, '500': {'count': 50}, '501': {'count': 50}, '502': {'count': 50}, '503': {'count': 50}, '504': {'count': 50}, '505': {'count': 50}, '506': {'count': 50}, '507': {'count': 50}, '508': {'count': 50}, '509': {'count': 50}, '510': {'count': 50}, '511': {'count': 50}, '512': {'count': 50}, '513': {'count': 50}, '514': {'count': 50}, '515': {'count': 50}, '516': {'count': 50}, '517': {'count': 50}, '518': {'count': 50}, '519': {'count': 50}, '520': {'count': 50}, '521': {'count': 50}, '522': {'count': 50}, '523': {'count': 50}, '524': {'count': 50}, '525': {'count': 50}, '526': {'count': 50}, '527': {'count': 50}, '528': {'count': 50}, '529': {'count': 50}, '530': {'count': 50}, '531': {'count': 50}, '532': {'count': 50}, '533': {'count': 50}, '534': {'count': 50}, '535': {'count': 50}, '536': {'count': 50}, '537': {'count': 50}, '538': {'count': 50}, '539': {'count': 50}, '540': {'count': 50}, '541': {'count': 50}, '542': {'count': 50}, '543': {'count': 50}, '544': {'count': 50}, '545': {'count': 50}, '546': {'count': 50}, '547': {'count': 50}, '548': {'count': 50}, '549': {'count': 50}, '550': {'count': 50}, '551': {'count': 50}, '552': {'count': 50}, '553': {'count': 50}, '554': {'count': 50}, '555': {'count': 50}, '556': {'count': 50}, '557': {'count': 50}, '558': {'count': 50}, '559': {'count': 50}, '560': {'count': 50}, '561': {'count': 50}, '562': {'count': 50}, '563': {'count': 50}, '564': {'count': 50}, '565': {'count': 50}, '566': {'count': 50}, '567': {'count': 50}, '568': {'count': 50}, '569': {'count': 50}, '570': {'count': 50}, '571': {'count': 50}, '572': {'count': 50}, '573': {'count': 50}, '574': {'count': 50}, '575': {'count': 50}, '576': {'count': 50}, '577': {'count': 50}, '578': {'count': 50}, '579': {'count': 50}, '580': {'count': 50}, '581': {'count': 50}, '582': {'count': 50}, '583': {'count': 50}, '584': {'count': 50}, '585': {'count': 50}, '586': {'count': 50}, '587': {'count': 50}, '588': {'count': 50}, '589': {'count': 50}, '590': {'count': 50}, '591': {'count': 50}, '592': {'count': 50}, '593': {'count': 50}, '594': {'count': 50}, '595': {'count': 50}, '596': {'count': 50}, '597': {'count': 50}, '598': {'count': 50}, '599': {'count': 50}, '600': {'count': 50}, '601': {'count': 50}, '602': {'count': 50}, '603': {'count': 50}, '604': {'count': 50}, '605': {'count': 50}, '606': {'count': 50}, '607': {'count': 50}, '608': {'count': 50}, '609': {'count': 50}, '610': {'count': 50}, '611': {'count': 50}, '612': {'count': 50}, '613': {'count': 50}, '614': {'count': 50}, '615': {'count': 50}, '616': {'count': 50}, '617': {'count': 50}, '618': {'count': 50}, '619': {'count': 50}, '620': {'count': 50}, '621': {'count': 50}, '622': {'count': 50}, '623': {'count': 50}, '624': {'count': 50}, '625': {'count': 50}, '626': {'count': 50}, '627': {'count': 50}, '628': {'count': 50}, '629': {'count': 50}, '630': {'count': 50}, '631': {'count': 50}, '632': {'count': 50}, '633': {'count': 50}, '634': {'count': 50}, '635': {'count': 50}, '636': {'count': 50}, '637': {'count': 50}, '638': {'count': 50}, '639': {'count': 50}, '640': {'count': 50}, '641': {'count': 50}, '642': {'count': 50}, '643': {'count': 50}, '644': {'count': 50}, '645': {'count': 50}, '646': {'count': 50}, '647': {'count': 50}, '648': {'count': 50}, '649': {'count': 50}, '650': {'count': 50}, '651': {'count': 50}, '652': {'count': 50}, '653': {'count': 50}, '654': {'count': 50}, '655': {'count': 50}, '656': {'count': 50}, '657': {'count': 50}, '658': {'count': 50}, '659': {'count': 50}, '660': {'count': 50}, '661': {'count': 50}, '662': {'count': 50}, '663': {'count': 50}, '664': {'count': 50}, '665': {'count': 50}, '666': {'count': 50}, '667': {'count': 50}, '668': {'count': 50}, '669': {'count': 50}, '670': {'count': 50}, '671': {'count': 50}, '672': {'count': 50}, '673': {'count': 50}, '674': {'count': 50}, '675': {'count': 50}, '676': {'count': 50}, '677': {'count': 50}, '678': {'count': 50}, '679': {'count': 50}, '680': {'count': 50}, '681': {'count': 50}, '682': {'count': 50}, '683': {'count': 50}, '684': {'count': 50}, '685': {'count': 50}, '686': {'count': 50}, '687': {'count': 50}, '688': {'count': 50}, '689': {'count': 50}, '690': {'count': 50}, '691': {'count': 50}, '692': {'count': 50}, '693': {'count': 50}, '694': {'count': 50}, '695': {'count': 50}, '696': {'count': 50}, '697': {'count': 50}, '698': {'count': 50}, '699': {'count': 50}, '700': {'count': 50}, '701': {'count': 50}, '702': {'count': 50}, '703': {'count': 50}, '704': {'count': 50}, '705': {'count': 50}, '706': {'count': 50}, '707': {'count': 50}, '708': {'count': 50}, '709': {'count': 50}, '710': {'count': 50}, '711': {'count': 50}, '712': {'count': 50}, '713': {'count': 50}, '714': {'count': 50}, '715': {'count': 50}, '716': {'count': 50}, '717': {'count': 50}, '718': {'count': 50}, '719': {'count': 50}, '720': {'count': 50}, '721': {'count': 50}, '722': {'count': 50}, '723': {'count': 50}, '724': {'count': 50}, '725': {'count': 50}, '726': {'count': 50}, '727': {'count': 50}, '728': {'count': 50}, '729': {'count': 50}, '730': {'count': 50}, '731': {'count': 50}, '732': {'count': 50}, '733': {'count': 50}, '734': {'count': 50}, '735': {'count': 50}, '736': {'count': 50}, '737': {'count': 50}, '738': {'count': 50}, '739': {'count': 50}, '740': {'count': 50}, '741': {'count': 50}, '742': {'count': 50}, '743': {'count': 50}, '744': {'count': 50}, '745': {'count': 50}, '746': {'count': 50}, '747': {'count': 50}, '748': {'count': 50}, '749': {'count': 50}, '750': {'count': 50}, '751': {'count': 50}, '752': {'count': 50}, '753': {'count': 50}, '754': {'count': 50}, '755': {'count': 50}, '756': {'count': 50}, '757': {'count': 50}, '758': {'count': 50}, '759': {'count': 50}, '760': {'count': 50}, '761': {'count': 50}, '762': {'count': 50}, '763': {'count': 50}, '764': {'count': 50}, '765': {'count': 50}, '766': {'count': 50}, '767': {'count': 50}, '768': {'count': 50}, '769': {'count': 50}, '770': {'count': 50}, '771': {'count': 50}, '772': {'count': 50}, '773': {'count': 50}, '774': {'count': 50}, '775': {'count': 50}, '776': {'count': 50}, '777': {'count': 50}, '778': {'count': 50}, '779': {'count': 50}, '780': {'count': 50}, '781': {'count': 50}, '782': {'count': 50}, '783': {'count': 50}, '784': {'count': 50}, '785': {'count': 50}, '786': {'count': 50}, '787': {'count': 50}, '788': {'count': 50}, '789': {'count': 50}, '790': {'count': 50}, '791': {'count': 50}, '792': {'count': 50}, '793': {'count': 50}, '794': {'count': 50}, '795': {'count': 50}, '796': {'count': 50}, '797': {'count': 50}, '798': {'count': 50}, '799': {'count': 50}, '800': {'count': 50}, '801': {'count': 50}, '802': {'count': 50}, '803': {'count': 50}, '804': {'count': 50}, '805': {'count': 50}, '806': {'count': 50}, '807': {'count': 50}, '808': {'count': 50}, '809': {'count': 50}, '810': {'count': 50}, '811': {'count': 50}, '812': {'count': 50}, '813': {'count': 50}, '814': {'count': 50}, '815': {'count': 50}, '816': {'count': 50}, '817': {'count': 50}, '818': {'count': 50}, '819': {'count': 50}, '820': {'count': 50}, '821': {'count': 50}, '822': {'count': 50}, '823': {'count': 50}, '824': {'count': 50}, '825': {'count': 50}, '826': {'count': 50}, '827': {'count': 50}, '828': {'count': 50}, '829': {'count': 50}, '830': {'count': 50}, '831': {'count': 50}, '832': {'count': 50}, '833': {'count': 50}, '834': {'count': 50}, '835': {'count': 50}, '836': {'count': 50}, '837': {'count': 50}, '838': {'count': 50}, '839': {'count': 50}, '840': {'count': 50}, '841': {'count': 50}, '842': {'count': 50}, '843': {'count': 50}, '844': {'count': 50}, '845': {'count': 50}, '846': {'count': 50}, '847': {'count': 50}, '848': {'count': 50}, '849': {'count': 50}, '850': {'count': 50}, '851': {'count': 50}, '852': {'count': 50}, '853': {'count': 50}, '854': {'count': 50}, '855': {'count': 50}, '856': {'count': 50}, '857': {'count': 50}, '858': {'count': 50}, '859': {'count': 50}, '860': {'count': 50}, '861': {'count': 50}, '862': {'count': 50}, '863': {'count': 50}, '864': {'count': 50}, '865': {'count': 50}, '866': {'count': 50}, '867': {'count': 50}, '868': {'count': 50}, '869': {'count': 50}, '870': {'count': 50}, '871': {'count': 50}, '872': {'count': 50}, '873': {'count': 50}, '874': {'count': 50}, '875': {'count': 50}, '876': {'count': 50}, '877': {'count': 50}, '878': {'count': 50}, '879': {'count': 50}, '880': {'count': 50}, '881': {'count': 50}, '882': {'count': 50}, '883': {'count': 50}, '884': {'count': 50}, '885': {'count': 50}, '886': {'count': 50}, '887': {'count': 50}, '888': {'count': 50}, '889': {'count': 50}, '890': {'count': 50}, '891': {'count': 50}, '892': {'count': 50}, '893': {'count': 50}, '894': {'count': 50}, '895': {'count': 50}, '896': {'count': 50}, '897': {'count': 50}, '898': {'count': 50}, '899': {'count': 50}, '900': {'count': 50}, '901': {'count': 50}, '902': {'count': 50}, '903': {'count': 50}, '904': {'count': 50}, '905': {'count': 50}, '906': {'count': 50}, '907': {'count': 50}, '908': {'count': 50}, '909': {'count': 50}, '910': {'count': 50}, '911': {'count': 50}, '912': {'count': 50}, '913': {'count': 50}, '914': {'count': 50}, '915': {'count': 50}, '916': {'count': 50}, '917': {'count': 50}, '918': {'count': 50}, '919': {'count': 50}, '920': {'count': 50}, '921': {'count': 50}, '922': {'count': 50}, '923': {'count': 50}, '924': {'count': 50}, '925': {'count': 50}, '926': {'count': 50}, '927': {'count': 50}, '928': {'count': 50}, '929': {'count': 50}, '930': {'count': 50}, '931': {'count': 50}, '932': {'count': 50}, '933': {'count': 50}, '934': {'count': 50}, '935': {'count': 50}, '936': {'count': 50}, '937': {'count': 50}, '938': {'count': 50}, '939': {'count': 50}, '940': {'count': 50}, '941': {'count': 50}, '942': {'count': 50}, '943': {'count': 50}, '944': {'count': 50}, '945': {'count': 50}, '946': {'count': 50}, '947': {'count': 50}, '948': {'count': 50}, '949': {'count': 50}, '950': {'count': 50}, '951': {'count': 50}, '952': {'count': 50}, '953': {'count': 50}, '954': {'count': 50}, '955': {'count': 50}, '956': {'count': 50}, '957': {'count': 50}, '958': {'count': 50}, '959': {'count': 50}, '960': {'count': 50}, '961': {'count': 50}, '962': {'count': 50}, '963': {'count': 50}, '964': {'count': 50}, '965': {'count': 50}, '966': {'count': 50}, '967': {'count': 50}, '968': {'count': 50}, '969': {'count': 50}, '970': {'count': 50}, '971': {'count': 50}, '972': {'count': 50}, '973': {'count': 50}, '974': {'count': 50}, '975': {'count': 50}, '976': {'count': 50}, '977': {'count': 50}, '978': {'count': 50}, '979': {'count': 50}, '980': {'count': 50}, '981': {'count': 50}, '982': {'count': 50}, '983': {'count': 50}, '984': {'count': 50}, '985': {'count': 50}, '986': {'count': 50}, '987': {'count': 50}, '988': {'count': 50}, '989': {'count': 50}, '990': {'count': 50}, '991': {'count': 50}, '992': {'count': 50}, '993': {'count': 50}, '994': {'count': 50}, '995': {'count': 50}, '996': {'count': 50}, '997': {'count': 50}, '998': {'count': 50}, '999': {'count': 50}}}}
Imagenet1kZeroShot (Deng et al., 2009) ['eng'] ZeroShotClassification i2t [Scene] {'test': 50000} {'test': {'num_samples': 50000, 'unique_num_labels': 1000, 'min_image_width': 54, 'average_image_width': 490.4, 'max_image_width': 4288, 'min_image_height': 56, 'average_image_height': 430.24, 'max_image_height': 5005, 'min_label_text_length': 15, 'average_label_text_length': 23.81, 'max_label_text_length': 50, 'labels': {'0': {'count': 50}, '1': {'count': 50}, '2': {'count': 50}, '3': {'count': 50}, '4': {'count': 50}, '5': {'count': 50}, '6': {'count': 50}, '7': {'count': 50}, '8': {'count': 50}, '9': {'count': 50}, '10': {'count': 50}, '11': {'count': 50}, '12': {'count': 50}, '13': {'count': 50}, '14': {'count': 50}, '15': {'count': 50}, '16': {'count': 50}, '17': {'count': 50}, '18': {'count': 50}, '19': {'count': 50}, '20': {'count': 50}, '21': {'count': 50}, '22': {'count': 50}, '23': {'count': 50}, '24': {'count': 50}, '25': {'count': 50}, '26': {'count': 50}, '27': {'count': 50}, '28': {'count': 50}, '29': {'count': 50}, '30': {'count': 50}, '31': {'count': 50}, '32': {'count': 50}, '33': {'count': 50}, '34': {'count': 50}, '35': {'count': 50}, '36': {'count': 50}, '37': {'count': 50}, '38': {'count': 50}, '39': {'count': 50}, '40': {'count': 50}, '41': {'count': 50}, '42': {'count': 50}, '43': {'count': 50}, '44': {'count': 50}, '45': {'count': 50}, '46': {'count': 50}, '47': {'count': 50}, '48': {'count': 50}, '49': {'count': 50}, '50': {'count': 50}, '51': {'count': 50}, '52': {'count': 50}, '53': {'count': 50}, '54': {'count': 50}, '55': {'count': 50}, '56': {'count': 50}, '57': {'count': 50}, '58': {'count': 50}, '59': {'count': 50}, '60': {'count': 50}, '61': {'count': 50}, '62': {'count': 50}, '63': {'count': 50}, '64': {'count': 50}, '65': {'count': 50}, '66': {'count': 50}, '67': {'count': 50}, '68': {'count': 50}, '69': {'count': 50}, '70': {'count': 50}, '71': {'count': 50}, '72': {'count': 50}, '73': {'count': 50}, '74': {'count': 50}, '75': {'count': 50}, '76': {'count': 50}, '77': {'count': 50}, '78': {'count': 50}, '79': {'count': 50}, '80': {'count': 50}, '81': {'count': 50}, '82': {'count': 50}, '83': {'count': 50}, '84': {'count': 50}, '85': {'count': 50}, '86': {'count': 50}, '87': {'count': 50}, '88': {'count': 50}, '89': {'count': 50}, '90': {'count': 50}, '91': {'count': 50}, '92': {'count': 50}, '93': {'count': 50}, '94': {'count': 50}, '95': {'count': 50}, '96': {'count': 50}, '97': {'count': 50}, '98': {'count': 50}, '99': {'count': 50}, '100': {'count': 50}, '101': {'count': 50}, '102': {'count': 50}, '103': {'count': 50}, '104': {'count': 50}, '105': {'count': 50}, '106': {'count': 50}, '107': {'count': 50}, '108': {'count': 50}, '109': {'count': 50}, '110': {'count': 50}, '111': {'count': 50}, '112': {'count': 50}, '113': {'count': 50}, '114': {'count': 50}, '115': {'count': 50}, '116': {'count': 50}, '117': {'count': 50}, '118': {'count': 50}, '119': {'count': 50}, '120': {'count': 50}, '121': {'count': 50}, '122': {'count': 50}, '123': {'count': 50}, '124': {'count': 50}, '125': {'count': 50}, '126': {'count': 50}, '127': {'count': 50}, '128': {'count': 50}, '129': {'count': 50}, '130': {'count': 50}, '131': {'count': 50}, '132': {'count': 50}, '133': {'count': 50}, '134': {'count': 50}, '135': {'count': 50}, '136': {'count': 50}, '137': {'count': 50}, '138': {'count': 50}, '139': {'count': 50}, '140': {'count': 50}, '141': {'count': 50}, '142': {'count': 50}, '143': {'count': 50}, '144': {'count': 50}, '145': {'count': 50}, '146': {'count': 50}, '147': {'count': 50}, '148': {'count': 50}, '149': {'count': 50}, '150': {'count': 50}, '151': {'count': 50}, '152': {'count': 50}, '153': {'count': 50}, '154': {'count': 50}, '155': {'count': 50}, '156': {'count': 50}, '157': {'count': 50}, '158': {'count': 50}, '159': {'count': 50}, '160': {'count': 50}, '161': {'count': 50}, '162': {'count': 50}, '163': {'count': 50}, '164': {'count': 50}, '165': {'count': 50}, '166': {'count': 50}, '167': {'count': 50}, '168': {'count': 50}, '169': {'count': 50}, '170': {'count': 50}, '171': {'count': 50}, '172': {'count': 50}, '173': {'count': 50}, '174': {'count': 50}, '175': {'count': 50}, '176': {'count': 50}, '177': {'count': 50}, '178': {'count': 50}, '179': {'count': 50}, '180': {'count': 50}, '181': {'count': 50}, '182': {'count': 50}, '183': {'count': 50}, '184': {'count': 50}, '185': {'count': 50}, '186': {'count': 50}, '187': {'count': 50}, '188': {'count': 50}, '189': {'count': 50}, '190': {'count': 50}, '191': {'count': 50}, '192': {'count': 50}, '193': {'count': 50}, '194': {'count': 50}, '195': {'count': 50}, '196': {'count': 50}, '197': {'count': 50}, '198': {'count': 50}, '199': {'count': 50}, '200': {'count': 50}, '201': {'count': 50}, '202': {'count': 50}, '203': {'count': 50}, '204': {'count': 50}, '205': {'count': 50}, '206': {'count': 50}, '207': {'count': 50}, '208': {'count': 50}, '209': {'count': 50}, '210': {'count': 50}, '211': {'count': 50}, '212': {'count': 50}, '213': {'count': 50}, '214': {'count': 50}, '215': {'count': 50}, '216': {'count': 50}, '217': {'count': 50}, '218': {'count': 50}, '219': {'count': 50}, '220': {'count': 50}, '221': {'count': 50}, '222': {'count': 50}, '223': {'count': 50}, '224': {'count': 50}, '225': {'count': 50}, '226': {'count': 50}, '227': {'count': 50}, '228': {'count': 50}, '229': {'count': 50}, '230': {'count': 50}, '231': {'count': 50}, '232': {'count': 50}, '233': {'count': 50}, '234': {'count': 50}, '235': {'count': 50}, '236': {'count': 50}, '237': {'count': 50}, '238': {'count': 50}, '239': {'count': 50}, '240': {'count': 50}, '241': {'count': 50}, '242': {'count': 50}, '243': {'count': 50}, '244': {'count': 50}, '245': {'count': 50}, '246': {'count': 50}, '247': {'count': 50}, '248': {'count': 50}, '249': {'count': 50}, '250': {'count': 50}, '251': {'count': 50}, '252': {'count': 50}, '253': {'count': 50}, '254': {'count': 50}, '255': {'count': 50}, '256': {'count': 50}, '257': {'count': 50}, '258': {'count': 50}, '259': {'count': 50}, '260': {'count': 50}, '261': {'count': 50}, '262': {'count': 50}, '263': {'count': 50}, '264': {'count': 50}, '265': {'count': 50}, '266': {'count': 50}, '267': {'count': 50}, '268': {'count': 50}, '269': {'count': 50}, '270': {'count': 50}, '271': {'count': 50}, '272': {'count': 50}, '273': {'count': 50}, '274': {'count': 50}, '275': {'count': 50}, '276': {'count': 50}, '277': {'count': 50}, '278': {'count': 50}, '279': {'count': 50}, '280': {'count': 50}, '281': {'count': 50}, '282': {'count': 50}, '283': {'count': 50}, '284': {'count': 50}, '285': {'count': 50}, '286': {'count': 50}, '287': {'count': 50}, '288': {'count': 50}, '289': {'count': 50}, '290': {'count': 50}, '291': {'count': 50}, '292': {'count': 50}, '293': {'count': 50}, '294': {'count': 50}, '295': {'count': 50}, '296': {'count': 50}, '297': {'count': 50}, '298': {'count': 50}, '299': {'count': 50}, '300': {'count': 50}, '301': {'count': 50}, '302': {'count': 50}, '303': {'count': 50}, '304': {'count': 50}, '305': {'count': 50}, '306': {'count': 50}, '307': {'count': 50}, '308': {'count': 50}, '309': {'count': 50}, '310': {'count': 50}, '311': {'count': 50}, '312': {'count': 50}, '313': {'count': 50}, '314': {'count': 50}, '315': {'count': 50}, '316': {'count': 50}, '317': {'count': 50}, '318': {'count': 50}, '319': {'count': 50}, '320': {'count': 50}, '321': {'count': 50}, '322': {'count': 50}, '323': {'count': 50}, '324': {'count': 50}, '325': {'count': 50}, '326': {'count': 50}, '327': {'count': 50}, '328': {'count': 50}, '329': {'count': 50}, '330': {'count': 50}, '331': {'count': 50}, '332': {'count': 50}, '333': {'count': 50}, '334': {'count': 50}, '335': {'count': 50}, '336': {'count': 50}, '337': {'count': 50}, '338': {'count': 50}, '339': {'count': 50}, '340': {'count': 50}, '341': {'count': 50}, '342': {'count': 50}, '343': {'count': 50}, '344': {'count': 50}, '345': {'count': 50}, '346': {'count': 50}, '347': {'count': 50}, '348': {'count': 50}, '349': {'count': 50}, '350': {'count': 50}, '351': {'count': 50}, '352': {'count': 50}, '353': {'count': 50}, '354': {'count': 50}, '355': {'count': 50}, '356': {'count': 50}, '357': {'count': 50}, '358': {'count': 50}, '359': {'count': 50}, '360': {'count': 50}, '361': {'count': 50}, '362': {'count': 50}, '363': {'count': 50}, '364': {'count': 50}, '365': {'count': 50}, '366': {'count': 50}, '367': {'count': 50}, '368': {'count': 50}, '369': {'count': 50}, '370': {'count': 50}, '371': {'count': 50}, '372': {'count': 50}, '373': {'count': 50}, '374': {'count': 50}, '375': {'count': 50}, '376': {'count': 50}, '377': {'count': 50}, '378': {'count': 50}, '379': {'count': 50}, '380': {'count': 50}, '381': {'count': 50}, '382': {'count': 50}, '383': {'count': 50}, '384': {'count': 50}, '385': {'count': 50}, '386': {'count': 50}, '387': {'count': 50}, '388': {'count': 50}, '389': {'count': 50}, '390': {'count': 50}, '391': {'count': 50}, '392': {'count': 50}, '393': {'count': 50}, '394': {'count': 50}, '395': {'count': 50}, '396': {'count': 50}, '397': {'count': 50}, '398': {'count': 50}, '399': {'count': 50}, '400': {'count': 50}, '401': {'count': 50}, '402': {'count': 50}, '403': {'count': 50}, '404': {'count': 50}, '405': {'count': 50}, '406': {'count': 50}, '407': {'count': 50}, '408': {'count': 50}, '409': {'count': 50}, '410': {'count': 50}, '411': {'count': 50}, '412': {'count': 50}, '413': {'count': 50}, '414': {'count': 50}, '415': {'count': 50}, '416': {'count': 50}, '417': {'count': 50}, '418': {'count': 50}, '419': {'count': 50}, '420': {'count': 50}, '421': {'count': 50}, '422': {'count': 50}, '423': {'count': 50}, '424': {'count': 50}, '425': {'count': 50}, '426': {'count': 50}, '427': {'count': 50}, '428': {'count': 50}, '429': {'count': 50}, '430': {'count': 50}, '431': {'count': 50}, '432': {'count': 50}, '433': {'count': 50}, '434': {'count': 50}, '435': {'count': 50}, '436': {'count': 50}, '437': {'count': 50}, '438': {'count': 50}, '439': {'count': 50}, '440': {'count': 50}, '441': {'count': 50}, '442': {'count': 50}, '443': {'count': 50}, '444': {'count': 50}, '445': {'count': 50}, '446': {'count': 50}, '447': {'count': 50}, '448': {'count': 50}, '449': {'count': 50}, '450': {'count': 50}, '451': {'count': 50}, '452': {'count': 50}, '453': {'count': 50}, '454': {'count': 50}, '455': {'count': 50}, '456': {'count': 50}, '457': {'count': 50}, '458': {'count': 50}, '459': {'count': 50}, '460': {'count': 50}, '461': {'count': 50}, '462': {'count': 50}, '463': {'count': 50}, '464': {'count': 50}, '465': {'count': 50}, '466': {'count': 50}, '467': {'count': 50}, '468': {'count': 50}, '469': {'count': 50}, '470': {'count': 50}, '471': {'count': 50}, '472': {'count': 50}, '473': {'count': 50}, '474': {'count': 50}, '475': {'count': 50}, '476': {'count': 50}, '477': {'count': 50}, '478': {'count': 50}, '479': {'count': 50}, '480': {'count': 50}, '481': {'count': 50}, '482': {'count': 50}, '483': {'count': 50}, '484': {'count': 50}, '485': {'count': 50}, '486': {'count': 50}, '487': {'count': 50}, '488': {'count': 50}, '489': {'count': 50}, '490': {'count': 50}, '491': {'count': 50}, '492': {'count': 50}, '493': {'count': 50}, '494': {'count': 50}, '495': {'count': 50}, '496': {'count': 50}, '497': {'count': 50}, '498': {'count': 50}, '499': {'count': 50}, '500': {'count': 50}, '501': {'count': 50}, '502': {'count': 50}, '503': {'count': 50}, '504': {'count': 50}, '505': {'count': 50}, '506': {'count': 50}, '507': {'count': 50}, '508': {'count': 50}, '509': {'count': 50}, '510': {'count': 50}, '511': {'count': 50}, '512': {'count': 50}, '513': {'count': 50}, '514': {'count': 50}, '515': {'count': 50}, '516': {'count': 50}, '517': {'count': 50}, '518': {'count': 50}, '519': {'count': 50}, '520': {'count': 50}, '521': {'count': 50}, '522': {'count': 50}, '523': {'count': 50}, '524': {'count': 50}, '525': {'count': 50}, '526': {'count': 50}, '527': {'count': 50}, '528': {'count': 50}, '529': {'count': 50}, '530': {'count': 50}, '531': {'count': 50}, '532': {'count': 50}, '533': {'count': 50}, '534': {'count': 50}, '535': {'count': 50}, '536': {'count': 50}, '537': {'count': 50}, '538': {'count': 50}, '539': {'count': 50}, '540': {'count': 50}, '541': {'count': 50}, '542': {'count': 50}, '543': {'count': 50}, '544': {'count': 50}, '545': {'count': 50}, '546': {'count': 50}, '547': {'count': 50}, '548': {'count': 50}, '549': {'count': 50}, '550': {'count': 50}, '551': {'count': 50}, '552': {'count': 50}, '553': {'count': 50}, '554': {'count': 50}, '555': {'count': 50}, '556': {'count': 50}, '557': {'count': 50}, '558': {'count': 50}, '559': {'count': 50}, '560': {'count': 50}, '561': {'count': 50}, '562': {'count': 50}, '563': {'count': 50}, '564': {'count': 50}, '565': {'count': 50}, '566': {'count': 50}, '567': {'count': 50}, '568': {'count': 50}, '569': {'count': 50}, '570': {'count': 50}, '571': {'count': 50}, '572': {'count': 50}, '573': {'count': 50}, '574': {'count': 50}, '575': {'count': 50}, '576': {'count': 50}, '577': {'count': 50}, '578': {'count': 50}, '579': {'count': 50}, '580': {'count': 50}, '581': {'count': 50}, '582': {'count': 50}, '583': {'count': 50}, '584': {'count': 50}, '585': {'count': 50}, '586': {'count': 50}, '587': {'count': 50}, '588': {'count': 50}, '589': {'count': 50}, '590': {'count': 50}, '591': {'count': 50}, '592': {'count': 50}, '593': {'count': 50}, '594': {'count': 50}, '595': {'count': 50}, '596': {'count': 50}, '597': {'count': 50}, '598': {'count': 50}, '599': {'count': 50}, '600': {'count': 50}, '601': {'count': 50}, '602': {'count': 50}, '603': {'count': 50}, '604': {'count': 50}, '605': {'count': 50}, '606': {'count': 50}, '607': {'count': 50}, '608': {'count': 50}, '609': {'count': 50}, '610': {'count': 50}, '611': {'count': 50}, '612': {'count': 50}, '613': {'count': 50}, '614': {'count': 50}, '615': {'count': 50}, '616': {'count': 50}, '617': {'count': 50}, '618': {'count': 50}, '619': {'count': 50}, '620': {'count': 50}, '621': {'count': 50}, '622': {'count': 50}, '623': {'count': 50}, '624': {'count': 50}, '625': {'count': 50}, '626': {'count': 50}, '627': {'count': 50}, '628': {'count': 50}, '629': {'count': 50}, '630': {'count': 50}, '631': {'count': 50}, '632': {'count': 50}, '633': {'count': 50}, '634': {'count': 50}, '635': {'count': 50}, '636': {'count': 50}, '637': {'count': 50}, '638': {'count': 50}, '639': {'count': 50}, '640': {'count': 50}, '641': {'count': 50}, '642': {'count': 50}, '643': {'count': 50}, '644': {'count': 50}, '645': {'count': 50}, '646': {'count': 50}, '647': {'count': 50}, '648': {'count': 50}, '649': {'count': 50}, '650': {'count': 50}, '651': {'count': 50}, '652': {'count': 50}, '653': {'count': 50}, '654': {'count': 50}, '655': {'count': 50}, '656': {'count': 50}, '657': {'count': 50}, '658': {'count': 50}, '659': {'count': 50}, '660': {'count': 50}, '661': {'count': 50}, '662': {'count': 50}, '663': {'count': 50}, '664': {'count': 50}, '665': {'count': 50}, '666': {'count': 50}, '667': {'count': 50}, '668': {'count': 50}, '669': {'count': 50}, '670': {'count': 50}, '671': {'count': 50}, '672': {'count': 50}, '673': {'count': 50}, '674': {'count': 50}, '675': {'count': 50}, '676': {'count': 50}, '677': {'count': 50}, '678': {'count': 50}, '679': {'count': 50}, '680': {'count': 50}, '681': {'count': 50}, '682': {'count': 50}, '683': {'count': 50}, '684': {'count': 50}, '685': {'count': 50}, '686': {'count': 50}, '687': {'count': 50}, '688': {'count': 50}, '689': {'count': 50}, '690': {'count': 50}, '691': {'count': 50}, '692': {'count': 50}, '693': {'count': 50}, '694': {'count': 50}, '695': {'count': 50}, '696': {'count': 50}, '697': {'count': 50}, '698': {'count': 50}, '699': {'count': 50}, '700': {'count': 50}, '701': {'count': 50}, '702': {'count': 50}, '703': {'count': 50}, '704': {'count': 50}, '705': {'count': 50}, '706': {'count': 50}, '707': {'count': 50}, '708': {'count': 50}, '709': {'count': 50}, '710': {'count': 50}, '711': {'count': 50}, '712': {'count': 50}, '713': {'count': 50}, '714': {'count': 50}, '715': {'count': 50}, '716': {'count': 50}, '717': {'count': 50}, '718': {'count': 50}, '719': {'count': 50}, '720': {'count': 50}, '721': {'count': 50}, '722': {'count': 50}, '723': {'count': 50}, '724': {'count': 50}, '725': {'count': 50}, '726': {'count': 50}, '727': {'count': 50}, '728': {'count': 50}, '729': {'count': 50}, '730': {'count': 50}, '731': {'count': 50}, '732': {'count': 50}, '733': {'count': 50}, '734': {'count': 50}, '735': {'count': 50}, '736': {'count': 50}, '737': {'count': 50}, '738': {'count': 50}, '739': {'count': 50}, '740': {'count': 50}, '741': {'count': 50}, '742': {'count': 50}, '743': {'count': 50}, '744': {'count': 50}, '745': {'count': 50}, '746': {'count': 50}, '747': {'count': 50}, '748': {'count': 50}, '749': {'count': 50}, '750': {'count': 50}, '751': {'count': 50}, '752': {'count': 50}, '753': {'count': 50}, '754': {'count': 50}, '755': {'count': 50}, '756': {'count': 50}, '757': {'count': 50}, '758': {'count': 50}, '759': {'count': 50}, '760': {'count': 50}, '761': {'count': 50}, '762': {'count': 50}, '763': {'count': 50}, '764': {'count': 50}, '765': {'count': 50}, '766': {'count': 50}, '767': {'count': 50}, '768': {'count': 50}, '769': {'count': 50}, '770': {'count': 50}, '771': {'count': 50}, '772': {'count': 50}, '773': {'count': 50}, '774': {'count': 50}, '775': {'count': 50}, '776': {'count': 50}, '777': {'count': 50}, '778': {'count': 50}, '779': {'count': 50}, '780': {'count': 50}, '781': {'count': 50}, '782': {'count': 50}, '783': {'count': 50}, '784': {'count': 50}, '785': {'count': 50}, '786': {'count': 50}, '787': {'count': 50}, '788': {'count': 50}, '789': {'count': 50}, '790': {'count': 50}, '791': {'count': 50}, '792': {'count': 50}, '793': {'count': 50}, '794': {'count': 50}, '795': {'count': 50}, '796': {'count': 50}, '797': {'count': 50}, '798': {'count': 50}, '799': {'count': 50}, '800': {'count': 50}, '801': {'count': 50}, '802': {'count': 50}, '803': {'count': 50}, '804': {'count': 50}, '805': {'count': 50}, '806': {'count': 50}, '807': {'count': 50}, '808': {'count': 50}, '809': {'count': 50}, '810': {'count': 50}, '811': {'count': 50}, '812': {'count': 50}, '813': {'count': 50}, '814': {'count': 50}, '815': {'count': 50}, '816': {'count': 50}, '817': {'count': 50}, '818': {'count': 50}, '819': {'count': 50}, '820': {'count': 50}, '821': {'count': 50}, '822': {'count': 50}, '823': {'count': 50}, '824': {'count': 50}, '825': {'count': 50}, '826': {'count': 50}, '827': {'count': 50}, '828': {'count': 50}, '829': {'count': 50}, '830': {'count': 50}, '831': {'count': 50}, '832': {'count': 50}, '833': {'count': 50}, '834': {'count': 50}, '835': {'count': 50}, '836': {'count': 50}, '837': {'count': 50}, '838': {'count': 50}, '839': {'count': 50}, '840': {'count': 50}, '841': {'count': 50}, '842': {'count': 50}, '843': {'count': 50}, '844': {'count': 50}, '845': {'count': 50}, '846': {'count': 50}, '847': {'count': 50}, '848': {'count': 50}, '849': {'count': 50}, '850': {'count': 50}, '851': {'count': 50}, '852': {'count': 50}, '853': {'count': 50}, '854': {'count': 50}, '855': {'count': 50}, '856': {'count': 50}, '857': {'count': 50}, '858': {'count': 50}, '859': {'count': 50}, '860': {'count': 50}, '861': {'count': 50}, '862': {'count': 50}, '863': {'count': 50}, '864': {'count': 50}, '865': {'count': 50}, '866': {'count': 50}, '867': {'count': 50}, '868': {'count': 50}, '869': {'count': 50}, '870': {'count': 50}, '871': {'count': 50}, '872': {'count': 50}, '873': {'count': 50}, '874': {'count': 50}, '875': {'count': 50}, '876': {'count': 50}, '877': {'count': 50}, '878': {'count': 50}, '879': {'count': 50}, '880': {'count': 50}, '881': {'count': 50}, '882': {'count': 50}, '883': {'count': 50}, '884': {'count': 50}, '885': {'count': 50}, '886': {'count': 50}, '887': {'count': 50}, '888': {'count': 50}, '889': {'count': 50}, '890': {'count': 50}, '891': {'count': 50}, '892': {'count': 50}, '893': {'count': 50}, '894': {'count': 50}, '895': {'count': 50}, '896': {'count': 50}, '897': {'count': 50}, '898': {'count': 50}, '899': {'count': 50}, '900': {'count': 50}, '901': {'count': 50}, '902': {'count': 50}, '903': {'count': 50}, '904': {'count': 50}, '905': {'count': 50}, '906': {'count': 50}, '907': {'count': 50}, '908': {'count': 50}, '909': {'count': 50}, '910': {'count': 50}, '911': {'count': 50}, '912': {'count': 50}, '913': {'count': 50}, '914': {'count': 50}, '915': {'count': 50}, '916': {'count': 50}, '917': {'count': 50}, '918': {'count': 50}, '919': {'count': 50}, '920': {'count': 50}, '921': {'count': 50}, '922': {'count': 50}, '923': {'count': 50}, '924': {'count': 50}, '925': {'count': 50}, '926': {'count': 50}, '927': {'count': 50}, '928': {'count': 50}, '929': {'count': 50}, '930': {'count': 50}, '931': {'count': 50}, '932': {'count': 50}, '933': {'count': 50}, '934': {'count': 50}, '935': {'count': 50}, '936': {'count': 50}, '937': {'count': 50}, '938': {'count': 50}, '939': {'count': 50}, '940': {'count': 50}, '941': {'count': 50}, '942': {'count': 50}, '943': {'count': 50}, '944': {'count': 50}, '945': {'count': 50}, '946': {'count': 50}, '947': {'count': 50}, '948': {'count': 50}, '949': {'count': 50}, '950': {'count': 50}, '951': {'count': 50}, '952': {'count': 50}, '953': {'count': 50}, '954': {'count': 50}, '955': {'count': 50}, '956': {'count': 50}, '957': {'count': 50}, '958': {'count': 50}, '959': {'count': 50}, '960': {'count': 50}, '961': {'count': 50}, '962': {'count': 50}, '963': {'count': 50}, '964': {'count': 50}, '965': {'count': 50}, '966': {'count': 50}, '967': {'count': 50}, '968': {'count': 50}, '969': {'count': 50}, '970': {'count': 50}, '971': {'count': 50}, '972': {'count': 50}, '973': {'count': 50}, '974': {'count': 50}, '975': {'count': 50}, '976': {'count': 50}, '977': {'count': 50}, '978': {'count': 50}, '979': {'count': 50}, '980': {'count': 50}, '981': {'count': 50}, '982': {'count': 50}, '983': {'count': 50}, '984': {'count': 50}, '985': {'count': 50}, '986': {'count': 50}, '987': {'count': 50}, '988': {'count': 50}, '989': {'count': 50}, '990': {'count': 50}, '991': {'count': 50}, '992': {'count': 50}, '993': {'count': 50}, '994': {'count': 50}, '995': {'count': 50}, '996': {'count': 50}, '997': {'count': 50}, '998': {'count': 50}, '999': {'count': 50}}}}
ImdbClassification ['eng'] Classification p2p [Reviews, Written] None None
InappropriatenessClassification ['rus'] Classification s2s [Social, Web, Written] None None
IndicCrosslingualSTS (Ramesh et al., 2022) ['asm', 'ben', 'eng', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] STS s2s [Government, News, Non-fiction, Spoken, Spoken, Web, Written] None None
IndicGenBenchFloresBitextMining (Harman Singh, 2024) ['asm', 'awa', 'ben', 'bgc', 'bho', 'bod', 'boy', 'eng', 'gbm', 'gom', 'guj', 'hin', 'hne', 'kan', 'mai', 'mal', 'mar', 'mni', 'mup', 'mwr', 'nep', 'ory', 'pan', 'pus', 'raj', 'san', 'sat', 'tam', 'tel', 'urd'] BitextMining s2s [News, Web, Written] {'validation': 57826, 'test': 58696} {'validation': {'num_samples': 57826, 'number_of_characters': 14600950, 'unique_pairs': 57826, 'min_sentence1_length': 24, 'average_sentence1_length': 126.25, 'max_sentence1_length': 368, 'unique_sentence1': 29903, 'min_sentence2_length': 24, 'average_sentence2_length': 126.24, 'max_sentence2_length': 368, 'unique_sentence2': 29903, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 123.65, 'max_sentence1_length': 320, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ben': {'num_samples': 997, 'number_of_characters': 248469, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 123.65, 'max_sentence2_length': 320, 'unique_sentence2': 997}, 'guj-eng': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 120.64, 'max_sentence1_length': 368, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-guj': {'num_samples': 997, 'number_of_characters': 245477, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 120.64, 'max_sentence2_length': 368, 'unique_sentence2': 997}, 'hin-eng': {'num_samples': 997, 'number_of_characters': 250573, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 125.76, 'max_sentence1_length': 355, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hin': {'num_samples': 997, 'number_of_characters': 250564, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 125.75, 'max_sentence2_length': 355, 'unique_sentence2': 997}, 'kan-eng': {'num_samples': 997, 'number_of_characters': 257131, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 132.33, 'max_sentence1_length': 331, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-kan': {'num_samples': 997, 'number_of_characters': 256986, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 132.19, 'max_sentence2_length': 331, 'unique_sentence2': 997}, 'mal-eng': {'num_samples': 997, 'number_of_characters': 267295, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 142.53, 'max_sentence1_length': 360, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mal': {'num_samples': 997, 'number_of_characters': 267296, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 142.53, 'max_sentence2_length': 360, 'unique_sentence2': 997}, 'mar-eng': {'num_samples': 997, 'number_of_characters': 251107, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.29, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mar': {'num_samples': 997, 'number_of_characters': 250897, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.08, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'tam-eng': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 146.57, 'max_sentence1_length': 358, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tam': {'num_samples': 997, 'number_of_characters': 271322, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 146.57, 'max_sentence2_length': 358, 'unique_sentence2': 997}, 'tel-eng': {'num_samples': 997, 'number_of_characters': 252385, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 127.57, 'max_sentence1_length': 317, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-tel': {'num_samples': 997, 'number_of_characters': 252380, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 127.57, 'max_sentence2_length': 317, 'unique_sentence2': 997}, 'urd-eng': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 125.01, 'max_sentence1_length': 295, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-urd': {'num_samples': 997, 'number_of_characters': 249824, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 125.01, 'max_sentence2_length': 295, 'unique_sentence2': 996}, 'asm-eng': {'num_samples': 997, 'number_of_characters': 246220, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 121.39, 'max_sentence1_length': 314, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-asm': {'num_samples': 997, 'number_of_characters': 246224, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 121.39, 'max_sentence2_length': 314, 'unique_sentence2': 997}, 'bho-eng': {'num_samples': 997, 'number_of_characters': 246895, 'unique_pairs': 997, 'min_sentence1_length': 25, 'average_sentence1_length': 122.07, 'max_sentence1_length': 326, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bho': {'num_samples': 997, 'number_of_characters': 246919, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 25, 'average_sentence2_length': 122.09, 'max_sentence2_length': 326, 'unique_sentence2': 997}, 'nep-eng': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 24, 'average_sentence1_length': 121.15, 'max_sentence1_length': 307, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-nep': {'num_samples': 997, 'number_of_characters': 245984, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 24, 'average_sentence2_length': 121.15, 'max_sentence2_length': 307, 'unique_sentence2': 997}, 'ory-eng': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 129.4, 'max_sentence1_length': 308, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-ory': {'num_samples': 997, 'number_of_characters': 254206, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 129.4, 'max_sentence2_length': 308, 'unique_sentence2': 997}, 'pan-eng': {'num_samples': 997, 'number_of_characters': 251598, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 126.78, 'max_sentence1_length': 309, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pan': {'num_samples': 997, 'number_of_characters': 251597, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 126.78, 'max_sentence2_length': 309, 'unique_sentence2': 997}, 'pus-eng': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 122.62, 'max_sentence1_length': 300, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-pus': {'num_samples': 997, 'number_of_characters': 247450, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 122.62, 'max_sentence2_length': 300, 'unique_sentence2': 997}, 'san-eng': {'num_samples': 997, 'number_of_characters': 249042, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 124.22, 'max_sentence1_length': 311, 'unique_sentence1': 994, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-san': {'num_samples': 997, 'number_of_characters': 248877, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 124.06, 'max_sentence2_length': 311, 'unique_sentence2': 994}, 'awa-eng': {'num_samples': 997, 'number_of_characters': 247944, 'unique_pairs': 997, 'min_sentence1_length': 34, 'average_sentence1_length': 123.12, 'max_sentence1_length': 329, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-awa': {'num_samples': 997, 'number_of_characters': 247884, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 34, 'average_sentence2_length': 123.06, 'max_sentence2_length': 329, 'unique_sentence2': 997}, 'bgc-eng': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 27, 'average_sentence1_length': 121.1, 'max_sentence1_length': 303, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bgc': {'num_samples': 997, 'number_of_characters': 245935, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 27, 'average_sentence2_length': 121.1, 'max_sentence2_length': 303, 'unique_sentence2': 997}, 'bod-eng': {'num_samples': 997, 'number_of_characters': 266515, 'unique_pairs': 997, 'min_sentence1_length': 26, 'average_sentence1_length': 141.75, 'max_sentence1_length': 355, 'unique_sentence1': 996, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-bod': {'num_samples': 997, 'number_of_characters': 266495, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 26, 'average_sentence2_length': 141.73, 'max_sentence2_length': 355, 'unique_sentence2': 996}, 'boy-eng': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 135.39, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-boy': {'num_samples': 997, 'number_of_characters': 260174, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 135.39, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'gbm-eng': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 30, 'average_sentence1_length': 122.18, 'max_sentence1_length': 344, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gbm': {'num_samples': 997, 'number_of_characters': 247009, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 30, 'average_sentence2_length': 122.18, 'max_sentence2_length': 344, 'unique_sentence2': 997}, 'gom-eng': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 119.72, 'max_sentence1_length': 306, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-gom': {'num_samples': 997, 'number_of_characters': 244553, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 119.72, 'max_sentence2_length': 306, 'unique_sentence2': 997}, 'hne-eng': {'num_samples': 997, 'number_of_characters': 246416, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 121.59, 'max_sentence1_length': 321, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-hne': {'num_samples': 997, 'number_of_characters': 246405, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 121.58, 'max_sentence2_length': 321, 'unique_sentence2': 997}, 'raj-eng': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 32, 'average_sentence1_length': 124.72, 'max_sentence1_length': 313, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-raj': {'num_samples': 997, 'number_of_characters': 249541, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 32, 'average_sentence2_length': 124.72, 'max_sentence2_length': 313, 'unique_sentence2': 997}, 'mai-eng': {'num_samples': 997, 'number_of_characters': 247991, 'unique_pairs': 997, 'min_sentence1_length': 29, 'average_sentence1_length': 123.17, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mai': {'num_samples': 997, 'number_of_characters': 247994, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 29, 'average_sentence2_length': 123.17, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mni-eng': {'num_samples': 997, 'number_of_characters': 254308, 'unique_pairs': 997, 'min_sentence1_length': 39, 'average_sentence1_length': 129.5, 'max_sentence1_length': 310, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mni': {'num_samples': 997, 'number_of_characters': 254312, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 39, 'average_sentence2_length': 129.51, 'max_sentence2_length': 310, 'unique_sentence2': 997}, 'mup-eng': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 123.66, 'max_sentence1_length': 312, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mup': {'num_samples': 997, 'number_of_characters': 248486, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 123.66, 'max_sentence2_length': 312, 'unique_sentence2': 997}, 'mwr-eng': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 31, 'average_sentence1_length': 123.82, 'max_sentence1_length': 324, 'unique_sentence1': 997, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-mwr': {'num_samples': 997, 'number_of_characters': 248641, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 31, 'average_sentence2_length': 123.82, 'max_sentence2_length': 324, 'unique_sentence2': 997}, 'sat-eng': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 37, 'average_sentence1_length': 133.49, 'max_sentence1_length': 333, 'unique_sentence1': 995, 'min_sentence2_length': 28, 'average_sentence2_length': 125.57, 'max_sentence2_length': 297, 'unique_sentence2': 997}, 'eng-sat': {'num_samples': 997, 'number_of_characters': 258279, 'unique_pairs': 997, 'min_sentence1_length': 28, 'average_sentence1_length': 125.57, 'max_sentence1_length': 297, 'unique_sentence1': 997, 'min_sentence2_length': 37, 'average_sentence2_length': 133.49, 'max_sentence2_length': 333, 'unique_sentence2': 995}}}, 'test': {'num_samples': 58696, 'number_of_characters': 15359416, 'unique_pairs': 58690, 'min_sentence1_length': 33, 'average_sentence1_length': 130.84, 'max_sentence1_length': 431, 'unique_sentence1': 30351, 'min_sentence2_length': 33, 'average_sentence2_length': 130.83, 'max_sentence2_length': 431, 'unique_sentence2': 30351, 'hf_subset_descriptive_stats': {'ben-eng': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.51, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ben': {'num_samples': 1012, 'number_of_characters': 261008, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.51, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'guj-eng': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.93, 'max_sentence1_length': 349, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-guj': {'num_samples': 1012, 'number_of_characters': 258394, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.93, 'max_sentence2_length': 349, 'unique_sentence2': 1012}, 'hin-eng': {'num_samples': 1012, 'number_of_characters': 263040, 'unique_pairs': 1012, 'min_sentence1_length': 41, 'average_sentence1_length': 129.52, 'max_sentence1_length': 381, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hin': {'num_samples': 1012, 'number_of_characters': 263029, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 41, 'average_sentence2_length': 129.51, 'max_sentence2_length': 381, 'unique_sentence2': 1012}, 'kan-eng': {'num_samples': 1012, 'number_of_characters': 270091, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 136.49, 'max_sentence1_length': 388, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-kan': {'num_samples': 1012, 'number_of_characters': 270021, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 136.42, 'max_sentence2_length': 388, 'unique_sentence2': 1012}, 'mal-eng': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 147.57, 'max_sentence1_length': 376, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mal': {'num_samples': 1012, 'number_of_characters': 281302, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 147.57, 'max_sentence2_length': 376, 'unique_sentence2': 1012}, 'mar-eng': {'num_samples': 1012, 'number_of_characters': 265212, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 131.67, 'max_sentence1_length': 356, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mar': {'num_samples': 1012, 'number_of_characters': 265023, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 131.48, 'max_sentence2_length': 355, 'unique_sentence2': 1012}, 'tam-eng': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 48, 'average_sentence1_length': 152.31, 'max_sentence1_length': 404, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tam': {'num_samples': 1012, 'number_of_characters': 286099, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 48, 'average_sentence2_length': 152.31, 'max_sentence2_length': 404, 'unique_sentence2': 1012}, 'tel-eng': {'num_samples': 1012, 'number_of_characters': 264460, 'unique_pairs': 1012, 'min_sentence1_length': 39, 'average_sentence1_length': 130.92, 'max_sentence1_length': 359, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-tel': {'num_samples': 1012, 'number_of_characters': 264447, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 39, 'average_sentence2_length': 130.91, 'max_sentence2_length': 359, 'unique_sentence2': 1012}, 'urd-eng': {'num_samples': 1012, 'number_of_characters': 261886, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 128.38, 'max_sentence1_length': 348, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-urd': {'num_samples': 1012, 'number_of_characters': 261885, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 128.38, 'max_sentence2_length': 348, 'unique_sentence2': 1012}, 'asm-eng': {'num_samples': 1012, 'number_of_characters': 257902, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 124.44, 'max_sentence1_length': 329, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-asm': {'num_samples': 1012, 'number_of_characters': 257909, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 124.45, 'max_sentence2_length': 329, 'unique_sentence2': 1012}, 'bho-eng': {'num_samples': 1012, 'number_of_characters': 260578, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.09, 'max_sentence1_length': 367, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bho': {'num_samples': 1012, 'number_of_characters': 260601, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.11, 'max_sentence2_length': 367, 'unique_sentence2': 1012}, 'nep-eng': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 125.4, 'max_sentence1_length': 362, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-nep': {'num_samples': 1012, 'number_of_characters': 258869, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 125.4, 'max_sentence2_length': 362, 'unique_sentence2': 1012}, 'ory-eng': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 133.24, 'max_sentence1_length': 354, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-ory': {'num_samples': 1012, 'number_of_characters': 266805, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 133.24, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'pan-eng': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 131.84, 'max_sentence1_length': 380, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pan': {'num_samples': 1012, 'number_of_characters': 265391, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 131.84, 'max_sentence2_length': 380, 'unique_sentence2': 1012}, 'pus-eng': {'num_samples': 1012, 'number_of_characters': 254422, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 121.0, 'max_sentence1_length': 325, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-pus': {'num_samples': 1012, 'number_of_characters': 254421, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 121.0, 'max_sentence2_length': 325, 'unique_sentence2': 1012}, 'san-eng': {'num_samples': 1012, 'number_of_characters': 260339, 'unique_pairs': 1012, 'min_sentence1_length': 33, 'average_sentence1_length': 126.85, 'max_sentence1_length': 358, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-san': {'num_samples': 1012, 'number_of_characters': 260224, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 33, 'average_sentence2_length': 126.74, 'max_sentence2_length': 358, 'unique_sentence2': 1011}, 'awa-eng': {'num_samples': 1012, 'number_of_characters': 260179, 'unique_pairs': 1012, 'min_sentence1_length': 34, 'average_sentence1_length': 126.69, 'max_sentence1_length': 378, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-awa': {'num_samples': 1012, 'number_of_characters': 260137, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 34, 'average_sentence2_length': 126.65, 'max_sentence2_length': 378, 'unique_sentence2': 1012}, 'bgc-eng': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 124.0, 'max_sentence1_length': 332, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bgc': {'num_samples': 1012, 'number_of_characters': 257450, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 124.0, 'max_sentence2_length': 332, 'unique_sentence2': 1012}, 'bod-eng': {'num_samples': 1012, 'number_of_characters': 280188, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 146.46, 'max_sentence1_length': 431, 'unique_sentence1': 1009, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-bod': {'num_samples': 1012, 'number_of_characters': 280126, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 146.4, 'max_sentence2_length': 431, 'unique_sentence2': 1009}, 'boy-eng': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 143.85, 'max_sentence1_length': 396, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-boy': {'num_samples': 1012, 'number_of_characters': 277538, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 143.85, 'max_sentence2_length': 396, 'unique_sentence2': 1011}, 'gbm-eng': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 127.53, 'max_sentence1_length': 333, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gbm': {'num_samples': 1012, 'number_of_characters': 261027, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 127.53, 'max_sentence2_length': 333, 'unique_sentence2': 1012}, 'gom-eng': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 37, 'average_sentence1_length': 125.71, 'max_sentence1_length': 335, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-gom': {'num_samples': 1012, 'number_of_characters': 259182, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 37, 'average_sentence2_length': 125.71, 'max_sentence2_length': 335, 'unique_sentence2': 1012}, 'hne-eng': {'num_samples': 1012, 'number_of_characters': 258911, 'unique_pairs': 1012, 'min_sentence1_length': 42, 'average_sentence1_length': 125.44, 'max_sentence1_length': 327, 'unique_sentence1': 1011, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-hne': {'num_samples': 1012, 'number_of_characters': 258915, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 42, 'average_sentence2_length': 125.44, 'max_sentence2_length': 326, 'unique_sentence2': 1011}, 'raj-eng': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 128.48, 'max_sentence1_length': 338, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-raj': {'num_samples': 1012, 'number_of_characters': 261987, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 128.48, 'max_sentence2_length': 338, 'unique_sentence2': 1012}, 'mai-eng': {'num_samples': 1012, 'number_of_characters': 261374, 'unique_pairs': 1012, 'min_sentence1_length': 36, 'average_sentence1_length': 127.87, 'max_sentence1_length': 350, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mai': {'num_samples': 1012, 'number_of_characters': 261377, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 36, 'average_sentence2_length': 127.88, 'max_sentence2_length': 350, 'unique_sentence2': 1012}, 'mni-eng': {'num_samples': 1012, 'number_of_characters': 268767, 'unique_pairs': 1012, 'min_sentence1_length': 38, 'average_sentence1_length': 135.18, 'max_sentence1_length': 353, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mni': {'num_samples': 1012, 'number_of_characters': 268768, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 38, 'average_sentence2_length': 135.18, 'max_sentence2_length': 354, 'unique_sentence2': 1012}, 'mup-eng': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 40, 'average_sentence1_length': 128.53, 'max_sentence1_length': 340, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mup': {'num_samples': 1012, 'number_of_characters': 262034, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 40, 'average_sentence2_length': 128.53, 'max_sentence2_length': 340, 'unique_sentence2': 1012}, 'mwr-eng': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.22, 'max_sentence1_length': 345, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-mwr': {'num_samples': 1012, 'number_of_characters': 263749, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.22, 'max_sentence2_length': 345, 'unique_sentence2': 1012}, 'sat-eng': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 43, 'average_sentence1_length': 138.13, 'max_sentence1_length': 366, 'unique_sentence1': 1012, 'min_sentence2_length': 35, 'average_sentence2_length': 130.4, 'max_sentence2_length': 368, 'unique_sentence2': 1012}, 'eng-sat': {'num_samples': 1012, 'number_of_characters': 271757, 'unique_pairs': 1012, 'min_sentence1_length': 35, 'average_sentence1_length': 130.4, 'max_sentence1_length': 368, 'unique_sentence1': 1012, 'min_sentence2_length': 43, 'average_sentence2_length': 138.13, 'max_sentence2_length': 366, 'unique_sentence2': 1012}}}}
IndicLangClassification ['asm', 'ben', 'brx', 'doi', 'gom', 'guj', 'hin', 'kan', 'kas', 'mai', 'mal', 'mar', 'mni', 'npi', 'ory', 'pan', 'san', 'sat', 'snd', 'tam', 'tel', 'urd'] Classification s2s [Non-fiction, Web, Written] None None
IndicNLPNewsClassification (Anoop Kunchukuttan, 2020) ['guj', 'kan', 'mal', 'mar', 'ori', 'pan', 'tam', 'tel'] Classification s2s [News, Written] None None
IndicQARetrieval (Sumanth Doddapaneni, 2022) ['asm', 'ben', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel'] Retrieval s2p [Web, Written] None None
IndicReviewsClusteringP2P (Sumanth Doddapaneni, 2022) ['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] Clustering p2p [Reviews, Written] None None
IndicSentimentClassification (Sumanth Doddapaneni, 2022) ['asm', 'ben', 'brx', 'guj', 'hin', 'kan', 'mal', 'mar', 'ory', 'pan', 'tam', 'tel', 'urd'] Classification s2s [Reviews, Written] None None
IndonesianIdClickbaitClassification ['ind'] Classification s2s [News, Written] None None
IndonesianMongabayConservationClassification ['ind'] Classification s2s [Web, Written] None None
InfoSeekIT2ITRetrieval (Chen et al., 2023) ['eng'] Any2AnyRetrieval it2it [Encyclopaedic] {'test': 499375} {'test': {'number_of_characters': 291755841, 'num_samples': 499375, 'num_queries': 17593, 'num_documents': 481782, 'min_document_length': 8, 'average_document_length': 603.95, 'max_document_length': 4062, 'unique_documents': 481782, 'num_document_images': 481782, 'min_query_length': 21, 'average_query_length': 44.41, 'max_query_length': 87, 'unique_queries': 350, 'num_query_images': 17593, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 7.47, 'max_relevant_docs_per_query': 128, 'unique_relevant_docs': 7891}}
InfoSeekIT2TRetrieval (Chen et al., 2023) ['eng'] Any2AnyRetrieval it2t [Encyclopaedic] {'test': 622974} {'test': {'number_of_characters': 360966739, 'num_samples': 622974, 'num_queries': 11323, 'num_documents': 611651, 'min_document_length': 9, 'average_document_length': 589.22, 'max_document_length': 7650, 'unique_documents': 611651, 'num_document_images': 0, 'min_query_length': 23, 'average_query_length': 50.18, 'max_query_length': 270, 'unique_queries': 269, 'num_query_images': 11323, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 6.52, 'max_relevant_docs_per_query': 66, 'unique_relevant_docs': 7799}}
InsurancePolicyInterpretationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
InternationalCitizenshipQuestionsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
IsiZuluNewsClassification (Madodonga et al., 2023) ['zul'] Classification s2s [News, Written] None None
ItaCaseholdClassification (Licari et al., 2023) ['ita'] Classification s2s [Government, Legal, Written] None None
Itacola ['ita'] Classification s2s [Non-fiction, Spoken, Written] None None
JCrewBlockerLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
JDReview (Xiao et al., 2023) ['cmn'] Classification s2s None None
JSICK (Yanaka et al., 2022) ['jpn'] STS s2s [Web, Written] None None
JSTS ['jpn'] STS s2s [Web, Written] None None
JaGovFaqsRetrieval ['jpn'] Retrieval s2s [Web, Written] None None
JaQuADRetrieval (ByungHoon So, 2022) ['jpn'] Retrieval p2p [Encyclopaedic, Non-fiction, Written] None None
JaqketRetrieval ['jpn'] Retrieval s2p [Encyclopaedic, Non-fiction, Written] {'test': 115226} {'test': {'number_of_characters': 428294530, 'num_samples': 115226, 'num_queries': 997, 'num_documents': 114229, 'min_document_length': 16, 'average_document_length': 0.44, 'max_document_length': 98, 'unique_documents': 114229, 'min_query_length': 8, 'average_query_length': 429532.57, 'max_query_length': 188424, 'unique_queries': 997, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 989}}
JavaneseIMDBClassification (Wongso et al., 2021) ['jav'] Classification s2s [Reviews, Written] None None
KLUE-NLI (Sungjoon Park, 2021) ['kor'] PairClassification s2s [Encyclopaedic, News, Written] None None
KLUE-STS (Sungjoon Park, 2021) ['kor'] STS s2s [News, Reviews, Spoken, Spoken, Written] None None
KLUE-TC (Sungjoon Park, 2021) ['kor'] Classification s2s [News, Written] None None
KannadaNewsClassification (Anoop Kunchukuttan, 2020) ['kan'] Classification s2s [News, Written] None None
KinopoiskClassification (Blinov et al., 2013) ['rus'] Classification p2p [Reviews, Written] None None
KlueMrcDomainClustering (Sungjoon Park, 2021) ['kor'] Clustering p2p [News, Written] None None
KlueYnatMrcCategoryClustering (Sungjoon Park, 2021) ['kor'] Clustering s2s [News, Written] None None
Ko-StrategyQA (Geva et al., 2021) ['kor'] Retrieval s2p None None
KorFin (Son et al., 2023) ['kor'] Classification s2s [Financial, News, Written] None None
KorHateClassification (Jihyung Moon, 2020) ['kor'] Classification s2s [Social, Written] None None
KorHateSpeechMLClassification ['kor'] MultilabelClassification s2s [Social, Written] None None
KorSTS (Ham et al., 2020) ['kor'] STS s2s [News, Web] None None
KorSarcasmClassification (Kim et al., 2019) ['kor'] Classification s2s [Social, Written] None None
KurdishSentimentClassification (Badawi et al., 2024) ['kur'] Classification s2s [Web, Written] None None
LCQMC (Shitao Xiao, 2024) ['cmn'] STS s2s None None
LEMBNarrativeQARetrieval ['eng'] Retrieval s2p [Fiction, Non-fiction, Written] None None
LEMBNeedleRetrieval (Zhu et al., 2024) ['eng'] Retrieval s2p [Academic, Blog, Written] None None
LEMBPasskeyRetrieval (Zhu et al., 2024) ['eng'] Retrieval s2p [Fiction, Written] None None
LEMBQMSumRetrieval ['eng'] Retrieval s2p [Spoken, Written] None None
LEMBSummScreenFDRetrieval ['eng'] Retrieval s2p [Spoken, Written] None None
LEMBWikimQARetrieval (Ho et al., 2020) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
LLaVAIT2TRetrieval ['eng'] Any2AnyRetrieval it2t [Encyclopaedic] {'test': 11114} {'test': {'number_of_characters': 3578932, 'num_samples': 11114, 'num_queries': 5120, 'num_documents': 5994, 'min_document_length': 17, 'average_document_length': 546.19, 'max_document_length': 2562, 'unique_documents': 5994, 'num_document_images': 0, 'min_query_length': 21, 'average_query_length': 59.58, 'max_query_length': 165, 'unique_queries': 3906, 'num_query_images': 5120, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 5108}}
LanguageClassification (Conneau et al., 2018) ['ara', 'bul', 'cmn', 'deu', 'ell', 'eng', 'fra', 'hin', 'ita', 'jpn', 'nld', 'pol', 'por', 'rus', 'spa', 'swa', 'tha', 'tur', 'urd', 'vie'] Classification s2s [Fiction, Government, Non-fiction, Reviews, Web, Written] {'test': 2048, 'train': 70000} {'test': {'num_samples': 2048, 'number_of_characters': 224352, 'num_texts_in_train': 31, 'min_text_length': 14, 'average_text_length': 109.55, 'max_text_length': 1270, 'unique_text': 2025, 'unique_labels': 20, 'labels': {'17': {'count': 102}, '0': {'count': 102}, '11': {'count': 102}, '4': {'count': 103}, '3': {'count': 102}, '1': {'count': 102}, '10': {'count': 102}, '2': {'count': 103}, '16': {'count': 103}, '9': {'count': 103}, '5': {'count': 102}, '7': {'count': 102}, '13': {'count': 102}, '14': {'count': 103}, '12': {'count': 102}, '15': {'count': 103}, '19': {'count': 102}, '18': {'count': 102}, '6': {'count': 103}, '8': {'count': 103}}}, 'train': {'num_samples': 70000, 'number_of_characters': 7760299, 'num_texts_in_train': None, 'min_text_length': 2, 'average_text_length': 110.86, 'max_text_length': 2422, 'unique_text': 68978, 'unique_labels': 20, 'labels': {'12': {'count': 3500}, '1': {'count': 3500}, '19': {'count': 3500}, '15': {'count': 3500}, '13': {'count': 3500}, '11': {'count': 3500}, '17': {'count': 3500}, '14': {'count': 3500}, '16': {'count': 3500}, '5': {'count': 3500}, '0': {'count': 3500}, '8': {'count': 3500}, '7': {'count': 3500}, '2': {'count': 3500}, '3': {'count': 3500}, '10': {'count': 3500}, '6': {'count': 3500}, '18': {'count': 3500}, '4': {'count': 3500}, '9': {'count': 3500}}}}
LccSentimentClassification ['dan'] Classification s2s [News, Web, Written] None None
LeCaRDv2 (Haitao Li, 2023) ['zho'] Retrieval p2p [Legal, Written] None None
LearnedHandsBenefitsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsBusinessLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsConsumerLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsCourtsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsCrimeLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsDivorceLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsDomesticViolenceLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsEducationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsEmploymentLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsEstatesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsFamilyLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsHealthLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsHousingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsImmigrationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsTortsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LearnedHandsTrafficLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LegalBenchConsumerContractsQA (Koreeda et al., 2021) ['eng'] Retrieval s2p [Legal, Written] None None
LegalBenchCorporateLobbying (Neel Guha, 2023) ['eng'] Retrieval s2p [Legal, Written] None None
LegalBenchPC (Neel Guha, 2023) ['eng'] PairClassification s2s [Legal, Written] None None
LegalQuAD (Hoppe et al., 2021) ['deu'] Retrieval s2p [Legal, Written] None None
LegalReasoningCausalityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
LegalSummarization ['eng'] Retrieval s2p [Legal, Written] None None
LinceMTBitextMining (Aguilar et al., 2020) ['eng', 'hin'] BitextMining s2s [Social, Written] None None
LitSearchRetrieval (Ajith et al., 2024) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
LivedoorNewsClustering.v2 ['jpn'] Clustering s2s [News, Written] None None
MAUDLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
METI2IRetrieval (Ypsilantis et al., 2021) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 348597} {'test': {'number_of_characters': 0, 'num_samples': 348597, 'num_queries': 87942, 'num_documents': 260655, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 260655, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 87942, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.96, 'max_relevant_docs_per_query': 9, 'unique_relevant_docs': 172713}}
MIRACLReranking (Zhang et al., 2023) ['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] Reranking s2s [Encyclopaedic, Written] None None
MIRACLRetrieval (Zhang et al., 2023) ['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] Retrieval s2p [Encyclopaedic, Written] None None
MIRACLRetrievalHardNegatives (Zhang et al., 2023) ['ara', 'ben', 'deu', 'eng', 'fas', 'fin', 'fra', 'hin', 'ind', 'jpn', 'kor', 'rus', 'spa', 'swa', 'tel', 'tha', 'yor', 'zho'] Retrieval s2p [Encyclopaedic, Written] None None
MLQARetrieval ['ara', 'deu', 'eng', 'hin', 'spa', 'vie', 'zho'] Retrieval s2p [Encyclopaedic, Written] None None
MLQuestions ['eng'] Retrieval s2p [Academic, Encyclopaedic, Written] None None
MLSUMClusteringP2P.v2 (Scialom et al., 2020) ['deu', 'fra', 'rus', 'spa'] Clustering p2p [News, Written] None None
MLSUMClusteringS2S.v2 (Scialom et al., 2020) ['deu', 'fra', 'rus', 'spa'] Clustering s2s [News, Written] None None
MMarcoReranking (Luiz Henrique Bonifacio, 2021) ['cmn'] Reranking s2s None None
MMarcoRetrieval (Shitao Xiao, 2024) ['cmn'] Retrieval s2p None None
MNIST (LeCun et al., 2010) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 10, 'min_image_width': 28, 'average_image_width': 28.0, 'max_image_width': 28, 'min_image_height': 28, 'average_image_height': 28.0, 'max_image_height': 28, 'labels': {'7': {'count': 1028}, '2': {'count': 1032}, '1': {'count': 1135}, '0': {'count': 980}, '4': {'count': 982}, '9': {'count': 1009}, '5': {'count': 892}, '6': {'count': 958}, '3': {'count': 1010}, '8': {'count': 974}}}}
MNISTZeroShot (LeCun et al., 2010) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 10000} {'test': {'num_samples': 10000, 'unique_num_labels': 10, 'min_image_width': 28, 'average_image_width': 28.0, 'max_image_width': 28, 'min_image_height': 28, 'average_image_height': 28.0, 'max_image_height': 28, 'min_label_text_length': 27, 'average_label_text_length': 27.0, 'max_label_text_length': 27, 'labels': {'7': {'count': 1028}, '2': {'count': 1032}, '1': {'count': 1135}, '0': {'count': 980}, '4': {'count': 982}, '9': {'count': 1009}, '5': {'count': 892}, '6': {'count': 958}, '3': {'count': 1010}, '8': {'count': 974}}}}
MSCOCOI2TRetrieval (Lin et al., 2014) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 29809} {'test': {'number_of_characters': 1303643, 'num_samples': 29809, 'num_queries': 5000, 'num_documents': 24809, 'min_document_length': 27, 'average_document_length': 52.55, 'max_document_length': 243, 'unique_documents': 24809, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 5000, 'min_relevant_docs_per_query': 4, 'average_relevant_docs_per_query': 5.0, 'max_relevant_docs_per_query': 5, 'unique_relevant_docs': 24809}}
MSCOCOT2IRetrieval (Lin et al., 2014) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 29809} {'test': {'number_of_characters': 1303643, 'num_samples': 29809, 'num_queries': 24809, 'num_documents': 5000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 5000, 'min_query_length': 27, 'average_query_length': 52.55, 'max_query_length': 243, 'unique_queries': 24809, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.01, 'max_relevant_docs_per_query': 8, 'unique_relevant_docs': 5000}}
MSMARCO (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) ['eng'] Retrieval s2p [Academic, Blog, Encyclopaedic, Government, Medical, News, Non-fiction, Reviews, Social, Web] None None
MSMARCO-Fa ['fas'] Retrieval s2p [Web] None None
MSMARCO-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Web, Written] None None
MSMARCO-PLHardNegatives (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Web, Written] None None
MSMARCOHardNegatives (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) ['eng'] Retrieval s2p [Academic, Blog, Encyclopaedic, Government, Medical, News, Non-fiction, Reviews, Social, Web] None None
MSMARCOv2 (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) ['eng'] Retrieval s2p [Academic, Blog, Encyclopaedic, Government, Medical, News, Non-fiction, Reviews, Social, Web] None None
MTOPDomainClassification ['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] Classification s2s [Spoken, Spoken] None None
MTOPIntentClassification ['deu', 'eng', 'fra', 'hin', 'spa', 'tha'] Classification s2s [Spoken, Spoken] None None
MacedonianTweetSentimentClassification ['mkd'] Classification s2s [Social, Written] None None
MalayalamNewsClassification (Anoop Kunchukuttan, 2020) ['mal'] Classification s2s [News, Written] None None
MalteseNewsClassification ['mlt'] MultilabelClassification s2s [Constructed, Written] None None
MarathiNewsClassification (Anoop Kunchukuttan, 2020) ['mar'] Classification s2s [News, Written] None None
MasakhaNEWSClassification (David Ifeoluwa Adelani, 2023) ['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] Classification s2s [News, Written] None None
MasakhaNEWSClusteringP2P (David Ifeoluwa Adelani, 2023) ['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] Clustering p2p [News, Non-fiction, Written] None None
MasakhaNEWSClusteringS2S (David Ifeoluwa Adelani, 2023) ['amh', 'eng', 'fra', 'hau', 'ibo', 'lin', 'lug', 'orm', 'pcm', 'run', 'sna', 'som', 'swa', 'tir', 'xho', 'yor'] Clustering s2s None None
MassiveIntentClassification (Jack FitzGerald, 2022) ['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] Classification s2s [Spoken] None None
MassiveScenarioClassification (Jack FitzGerald, 2022) ['afr', 'amh', 'ara', 'aze', 'ben', 'cmo', 'cym', 'dan', 'deu', 'ell', 'eng', 'fas', 'fin', 'fra', 'heb', 'hin', 'hun', 'hye', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kan', 'kat', 'khm', 'kor', 'lav', 'mal', 'mon', 'msa', 'mya', 'nld', 'nob', 'pol', 'por', 'ron', 'rus', 'slv', 'spa', 'sqi', 'swa', 'swe', 'tam', 'tel', 'tgl', 'tha', 'tur', 'urd', 'vie'] Classification s2s [Spoken] None None
MedicalQARetrieval (Asma et al., 2019) ['eng'] Retrieval s2s [Medical, Written] None None
MedicalRetrieval ['cmn'] Retrieval s2p None None
MedrxivClusteringP2P.v2 ['eng'] Clustering p2p [Academic, Medical, Written] {'test': 37500} {'test': {'num_samples': 37500, 'number_of_characters': 74294927, 'min_text_length': 148, 'average_text_length': 1981.2, 'max_text_length': 38759, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}}
MedrxivClusteringS2S.v2 ['eng'] Clustering s2s [Academic, Medical, Written] {'test': 37500} {'test': {'num_samples': 37500, 'number_of_characters': 4301276, 'min_text_length': 18, 'average_text_length': 114.7, 'max_text_length': 339, 'min_labels_per_text': 6, 'average_labels_per_text': 1.0, 'max_labels_per_text': 8830, 'unique_labels': 51, 'labels': {'epidemiology': {'count': 6656}, 'public and global health': {'count': 3595}, 'oncology': {'count': 845}, 'allergy and immunology': {'count': 464}, 'orthopedics': {'count': 104}, 'health informatics': {'count': 1107}, 'occupational and environmental health': {'count': 415}, 'infectious diseases': {'count': 8830}, 'genetic and genomic medicine': {'count': 1918}, 'health policy': {'count': 527}, 'gastroenterology': {'count': 343}, 'radiology and imaging': {'count': 541}, 'pain medicine': {'count': 121}, 'neurology': {'count': 1773}, 'primary care research': {'count': 232}, 'rheumatology': {'count': 189}, 'endocrinology': {'count': 419}, 'hematology': {'count': 202}, 'addiction medicine': {'count': 178}, 'pediatrics': {'count': 589}, 'cardiovascular medicine': {'count': 855}, 'obstetrics and gynecology': {'count': 373}, 'health systems and quality improvement': {'count': 491}, 'nephrology': {'count': 241}, 'respiratory medicine': {'count': 482}, 'geriatric medicine': {'count': 169}, 'dentistry and oral medicine': {'count': 159}, 'psychiatry and clinical psychology': {'count': 1781}, 'nutrition': {'count': 240}, 'intensive care and critical care medicine': {'count': 368}, 'rehabilitation medicine and physical therapy': {'count': 322}, 'otolaryngology': {'count': 166}, 'nursing': {'count': 93}, 'transplantation': {'count': 118}, 'health economics': {'count': 327}, 'sports medicine': {'count': 180}, 'hiv aids': {'count': 363}, 'dermatology': {'count': 98}, 'pathology': {'count': 223}, 'emergency medicine': {'count': 191}, 'pharmacology and therapeutics': {'count': 221}, 'ophthalmology': {'count': 220}, 'medical ethics': {'count': 46}, 'palliative medicine': {'count': 45}, 'sexual and reproductive health': {'count': 156}, 'medical education': {'count': 203}, 'surgery': {'count': 162}, 'urology': {'count': 65}, 'anesthesia': {'count': 72}, 'toxicology': {'count': 16}, 'forensic medicine': {'count': 6}}}}
MemotionI2TRetrieval (Sharma et al., 2020) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 7685} {'test': {'number_of_characters': 578340, 'num_samples': 7685, 'num_queries': 697, 'num_documents': 6988, 'min_document_length': 0, 'average_document_length': 82.76, 'max_document_length': 1026, 'unique_documents': 6939, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 697, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 697}}
MemotionT2IRetrieval (Sharma et al., 2020) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 7685} {'test': {'number_of_characters': 58409, 'num_samples': 7685, 'num_queries': 697, 'num_documents': 6988, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 6988, 'min_query_length': 4, 'average_query_length': 83.8, 'max_query_length': 504, 'unique_queries': 697, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 697}}
MewsC16JaClustering ['jpn'] Clustering s2s [News, Written] None None
MindSmallReranking ['eng'] Reranking s2s [News, Written] None None
MintakaRetrieval ['ara', 'deu', 'fra', 'hin', 'ita', 'jpn', 'por', 'spa'] Retrieval s2p [Encyclopaedic, Written] None None
Moroco (Andrei M. Butnaru, 2019) ['ron'] Classification s2s [News, Written] None None
MovieReviewSentimentClassification (Théophile Blard, 2020) ['fra'] Classification s2s [Reviews, Written] None None
MrTidyRetrieval (Xinyu Zhang, 2021) ['ara', 'ben', 'eng', 'fin', 'ind', 'jpn', 'kor', 'rus', 'swa', 'tel', 'tha'] Retrieval s2p [Encyclopaedic, Written] None None
MultiEURLEXMultilabelClassification (Chalkidis et al., 2021) ['bul', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fin', 'fra', 'hrv', 'hun', 'ita', 'lav', 'lit', 'mlt', 'nld', 'pol', 'por', 'ron', 'slk', 'slv', 'spa', 'swe'] MultilabelClassification p2p [Government, Legal, Written] None None
MultiHateClassification ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'nld', 'pol', 'por', 'spa'] Classification s2s [Constructed, Written] None None
MultiLongDocRetrieval (Jianlv Chen, 2024) ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'por', 'rus', 'spa', 'tha'] Retrieval s2p [Encyclopaedic, Fiction, Non-fiction, Web, Written] None None
MultilingualSentiment ['cmn'] Classification s2s None None
MultilingualSentimentClassification ['ara', 'bam', 'bul', 'cmn', 'cym', 'deu', 'dza', 'ell', 'eng', 'eus', 'fas', 'fin', 'heb', 'hrv', 'ind', 'jpn', 'kor', 'mlt', 'nor', 'pol', 'rus', 'slk', 'spa', 'tha', 'tur', 'uig', 'urd', 'vie', 'zho'] Classification s2s [Reviews, Written] None None
MyanmarNews (A. H. Khine, 2017) ['mya'] Classification p2p [News, Written] None None
NFCorpus (Boteva et al., 2016) ['eng'] Retrieval s2p [Academic, Medical, Written] {'test': 3956} {'test': {'number_of_characters': 1612.55, 'num_samples': 3956, 'num_queries': 323, 'num_documents': 3633, 'average_document_length': 0.44, 'average_query_length': 0.07, 'average_relevant_docs_per_query': 38.19}}
NFCorpus-Fa ['fas'] Retrieval s2p [Medical] None None
NFCorpus-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Academic, Medical, Written] None None
NFCorpus-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p None None
NIGHTSI2IRetrieval (Fu et al., 2024) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 42158} {'test': {'number_of_characters': 0, 'num_samples': 42158, 'num_queries': 2120, 'num_documents': 40038, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 40038, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 2120, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2120}}
NLPJournalAbsIntroRetrieval ['jpn'] Retrieval s2s [Academic, Written] None None
NLPJournalTitleAbsRetrieval ['jpn'] Retrieval s2s [Academic, Written] None None
NLPJournalTitleIntroRetrieval ['jpn'] Retrieval s2s [Academic, Written] None None
NLPTwitterAnalysisClassification ['fas'] Classification s2p [Social] None None
NLPTwitterAnalysisClustering ['fas'] Clustering s2s [Social] None None
NQ (Tom Kwiatkowski, 2019) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
NQ-Fa ['fas'] Retrieval s2p [Encyclopaedic] None None
NQ-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Encyclopaedic, Written] None None
NQ-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p None None
NQ-PLHardNegatives (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p None None
NQHardNegatives (Tom Kwiatkowski, 2019) ['eng'] Retrieval s2p None None
NTREXBitextMining ['afr', 'amh', 'arb', 'aze', 'bak', 'bel', 'bem', 'ben', 'bod', 'bos', 'bul', 'cat', 'ces', 'ckb', 'cym', 'dan', 'deu', 'div', 'dzo', 'ell', 'eng', 'eus', 'ewe', 'fao', 'fas', 'fij', 'fil', 'fin', 'fra', 'fuc', 'gle', 'glg', 'guj', 'hau', 'heb', 'hin', 'hmn', 'hrv', 'hun', 'hye', 'ibo', 'ind', 'isl', 'ita', 'jpn', 'kan', 'kat', 'kaz', 'khm', 'kin', 'kir', 'kmr', 'kor', 'lao', 'lav', 'lit', 'ltz', 'mal', 'mar', 'mey', 'mkd', 'mlg', 'mlt', 'mon', 'mri', 'msa', 'mya', 'nde', 'nep', 'nld', 'nno', 'nob', 'nso', 'nya', 'orm', 'pan', 'pol', 'por', 'prs', 'pus', 'ron', 'rus', 'shi', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'spa', 'sqi', 'srp', 'ssw', 'swa', 'swe', 'tah', 'tam', 'tat', 'tel', 'tgk', 'tha', 'tir', 'ton', 'tsn', 'tuk', 'tur', 'uig', 'ukr', 'urd', 'uzb', 'ven', 'vie', 'wol', 'xho', 'yor', 'yue', 'zho', 'zul'] BitextMining s2s [News, Written] {'test': 3826252} {'test': {'num_samples': 3826252, 'number_of_characters': 988355274, 'unique_pairs': 3820263, 'min_sentence1_length': 1, 'average_sentence1_length': 129.15, 'max_sentence1_length': 773, 'unique_sentence1': 241259, 'min_sentence2_length': 1, 'average_sentence2_length': 129.15, 'max_sentence2_length': 773, 'unique_sentence2': 241259, 'hf_subset_descriptive_stats': {'afr_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'afr_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'afr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'afr_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'afr_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'afr_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'afr_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'afr_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'afr_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'afr_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.38, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'amh_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'amh_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'amh_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'amh_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'amh_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'amh_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'amh_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'amh_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'amh_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'amh_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'amh_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'amh_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'amh_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'amh_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 83.88, 'max_sentence1_length': 290, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'arb_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'arb_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'arb_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'arb_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'arb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'arb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'arb_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'arb_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'arb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'arb_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'arb_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'arb_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'arb_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'arb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'arb_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'arb_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'arb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'arb_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'arb_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'arb_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'arb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'arb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'arb_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'arb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'arb_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'arb_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'arb_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'arb_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'arb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'arb_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'arb_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'arb_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'arb_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 115.76, 'max_sentence1_length': 362, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'aze_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'aze_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'aze_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'aze_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'aze_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'aze_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'aze_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'aze_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'aze_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 135.02, 'max_sentence1_length': 398, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bak_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515960, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'bak_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bak_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'bak_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'bak_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'bak_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'bak_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'bak_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'bak_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 123.35, 'max_sentence1_length': 437, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'bel_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bel_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bel_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bel_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bel_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bel_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bel_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bel_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bel_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bel_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bel_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bel_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bel_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 128.24, 'max_sentence1_length': 422, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bem_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bem_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'bem_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'bem_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'bem_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'bem_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'bem_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'bem_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 149.47, 'max_sentence1_length': 465, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'ben_Beng-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474983, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ben_Beng-deu_Latn': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ben_Beng-div_Thaa': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'ben_Beng-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ben_Beng-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ben_Beng-eus_Latn': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'ben_Beng-fas_Arab': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ben_Beng-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ben_Beng-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ben_Beng-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'ben_Beng-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ben_Beng-hin_Deva': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ben_Beng-hun_Latn': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ben_Beng-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ben_Beng-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ben_Beng-kan_Knda': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'ben_Beng-kor_Hang': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ben_Beng-lit_Latn': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ben_Beng-mar_Deva': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'ben_Beng-nep_Deva': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ben_Beng-pan_Guru': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'ben_Beng-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ben_Beng-por_Latn': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ben_Beng-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ben_Beng-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ben_Beng-snd_Arab': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'ben_Beng-spa_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ben_Beng-swa_Latn': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ben_Beng-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ben_Beng-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ben_Beng-tel_Telu': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'ben_Beng-tur_Latn': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ben_Beng-urd_Arab': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ben_Beng-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ben_Beng-zho_Hant': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ben_Beng-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 122.08, 'max_sentence1_length': 402, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'bod_Tibt-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'bod_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bod_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'bod_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'bod_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'bod_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'bod_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 150.54, 'max_sentence1_length': 478, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'bos_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 511000, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bos_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'bos_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bos_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bos_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bos_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bos_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bos_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bos_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bos_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bos_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bos_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bos_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 127.65, 'max_sentence1_length': 434, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'bul_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 525979, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'bul_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 524799, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'bul_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'bul_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'bul_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'bul_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'bul_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'bul_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'bul_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'bul_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'bul_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'bul_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'bul_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 135.15, 'max_sentence1_length': 493, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'cat_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cat_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'cat_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'cat_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'cat_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'cat_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'cat_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'cat_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 141.69, 'max_sentence1_length': 460, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ces_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 497408, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ces_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 496228, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ces_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 511207, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ces_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ces_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ces_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ces_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ces_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ces_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ces_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ces_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ces_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'ces_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 120.84, 'max_sentence1_length': 474, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ckb_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483548, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ckb_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ckb_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ckb_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ckb_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'ckb_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'ckb_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'ckb_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'ckb_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'ckb_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 126.37, 'max_sentence1_length': 399, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'cym_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'cym_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.45, 'max_sentence1_length': 444, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'dan_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520490, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'dan_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'dan_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dan_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'dan_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'dan_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'dan_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'dan_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'dan_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'dan_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 126.26, 'max_sentence1_length': 522, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 564002, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 526831, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'deu_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 539452, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'deu_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 547788, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'deu_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'deu_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'deu_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'deu_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'deu_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'deu_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'deu_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'deu_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'deu_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'deu_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'deu_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'deu_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'deu_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'deu_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'deu_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'deu_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'deu_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'deu_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'deu_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'deu_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'deu_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'deu_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'deu_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'deu_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'deu_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'deu_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'deu_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'deu_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'deu_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 148.05, 'max_sentence1_length': 508, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'div_Thaa-ben_Beng': {'num_samples': 1997, 'number_of_characters': 547650, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'div_Thaa-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'div_Thaa-eus_Latn': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'div_Thaa-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'div_Thaa-hin_Deva': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'div_Thaa-kan_Knda': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'div_Thaa-mar_Deva': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'div_Thaa-nep_Deva': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-pan_Guru': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'div_Thaa-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'div_Thaa-snd_Arab': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'div_Thaa-tam_Taml': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'div_Thaa-tel_Telu': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'div_Thaa-urd_Arab': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 152.15, 'max_sentence1_length': 609, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'dzo_Tibt-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 543850, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'dzo_Tibt-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'dzo_Tibt-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'dzo_Tibt-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'dzo_Tibt-mon_Mong': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'dzo_Tibt-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'dzo_Tibt-tha_Thai': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 121.79, 'max_sentence1_length': 411, 'unique_sentence1': 1992, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'ell_Grek-arb_Arab': {'num_samples': 1997, 'number_of_characters': 530308, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ell_Grek-ben_Beng': {'num_samples': 1997, 'number_of_characters': 542929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ell_Grek-deu_Latn': {'num_samples': 1997, 'number_of_characters': 594777, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ell_Grek-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ell_Grek-fas_Arab': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ell_Grek-fin_Latn': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ell_Grek-fra_Latn': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ell_Grek-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ell_Grek-hin_Deva': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ell_Grek-hun_Latn': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ell_Grek-hye_Armn': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'ell_Grek-ind_Latn': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ell_Grek-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ell_Grek-kat_Geor': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'ell_Grek-kor_Hang': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ell_Grek-lit_Latn': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ell_Grek-nld_Latn': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ell_Grek-pol_Latn': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ell_Grek-por_Latn': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ell_Grek-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ell_Grek-spa_Latn': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ell_Grek-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ell_Grek-swa_Latn': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ell_Grek-swe_Latn': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ell_Grek-tam_Taml': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ell_Grek-tur_Latn': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ell_Grek-vie_Latn': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ell_Grek-zho_Hant': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ell_Grek-zul_Latn': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 149.79, 'max_sentence1_length': 584, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eng_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516072, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 415227, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'eng_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 478901, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'eng_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 517354, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'eng_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 494046, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'eng_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 503810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'eng_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 546212, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'eng_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491522, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eng_Latn-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 548349, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'eng_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 502630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'eng_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 517609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'eng_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 530680, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'eng_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 489038, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'eng_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 500087, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'eng_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 514225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'eng_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 499858, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'eng_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 543370, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'eng_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551568, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eng_Latn-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 490941, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'eng_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 546847, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'eng_Latn-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'eng_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'eng_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'eng_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'eng_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'eng_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'eng_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'eng_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'eng_Latn-gle_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 147.63, 'max_sentence2_length': 461, 'unique_sentence2': 1997}, 'eng_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eng_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'eng_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'eng_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eng_Latn-hmn_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 165.64, 'max_sentence2_length': 643, 'unique_sentence2': 1997}, 'eng_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'eng_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'eng_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'eng_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'eng_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'eng_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'eng_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eng_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'eng_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'eng_Latn-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'eng_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'eng_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'eng_Latn-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'eng_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'eng_Latn-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'eng_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'eng_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'eng_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'eng_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'eng_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eng_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'eng_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'eng_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'eng_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-mon_Mong': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'eng_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'eng_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'eng_Latn-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'eng_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'eng_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'eng_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'eng_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'eng_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'eng_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'eng_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eng_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'eng_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'eng_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'eng_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'eng_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'eng_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'eng_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'eng_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eng_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'eng_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'eng_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'eng_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'eng_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eng_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'eng_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'eng_Latn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'eng_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'eng_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'eng_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'eng_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'eng_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'eng_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'eng_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eng_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'eng_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eng_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'eng_Latn-tha_Thai': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'eng_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'eng_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'eng_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'eng_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'eng_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'eng_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'eng_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'eng_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'eng_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'eng_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'eng_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'eng_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'eng_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'eng_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'eng_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'eng_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'eng_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'eng_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.05, 'max_sentence1_length': 437, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'eus_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 519005, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'eus_Latn-div_Thaa': {'num_samples': 1997, 'number_of_characters': 579051, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'eus_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522923, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'eus_Latn-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'eus_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'eus_Latn-kan_Knda': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'eus_Latn-mar_Deva': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'eus_Latn-nep_Deva': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-pan_Guru': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'eus_Latn-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'eus_Latn-snd_Arab': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'eus_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'eus_Latn-tel_Telu': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'eus_Latn-urd_Arab': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 137.81, 'max_sentence1_length': 393, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'ewe_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 537470, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ewe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 486698, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ewe_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ewe_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ewe_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ewe_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ewe_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'ewe_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 119.67, 'max_sentence1_length': 493, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'fao_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 526155, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fao_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 509941, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'fao_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553453, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fao_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505523, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fao_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'fao_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'fao_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fao_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'fao_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'fao_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.1, 'max_sentence1_length': 433, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 474520, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fas_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 487141, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fas_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495706, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'fas_Arab-deu_Latn': {'num_samples': 1997, 'number_of_characters': 538989, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fas_Arab-ell_Grek': {'num_samples': 1997, 'number_of_characters': 542466, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fas_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 491059, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fas_Arab-fin_Latn': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fas_Arab-fra_Latn': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fas_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fas_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fas_Arab-hun_Latn': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fas_Arab-ind_Latn': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fas_Arab-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fas_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'fas_Arab-kor_Hang': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fas_Arab-lit_Latn': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fas_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'fas_Arab-nld_Latn': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fas_Arab-pol_Latn': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fas_Arab-por_Latn': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fas_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'fas_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'fas_Arab-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fas_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'fas_Arab-spa_Latn': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fas_Arab-swa_Latn': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fas_Arab-swe_Latn': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fas_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fas_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'fas_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fas_Arab-vie_Latn': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fas_Arab-zho_Hant': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fas_Arab-zul_Latn': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 121.85, 'max_sentence1_length': 389, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fij_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 548225, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fij_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'fij_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fij_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fij_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fij_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fij_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fij_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fij_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fij_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.48, 'max_sentence1_length': 448, 'unique_sentence1': 1988, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fil_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541140, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fil_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 593925, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'fil_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fil_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'fil_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'fil_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'fil_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'fil_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'fil_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'fil_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 146.93, 'max_sentence1_length': 554, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'fin_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 500981, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fin_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 513602, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fin_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 565450, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fin_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 568927, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517520, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fin_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 513139, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fin_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'fin_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fin_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fin_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fin_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fin_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fin_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fin_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'fin_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fin_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fin_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fin_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fin_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fin_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fin_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fin_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fin_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fin_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fin_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fin_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fin_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.1, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fra_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 524289, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'fra_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 536910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'fra_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 576068, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'fra_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 588758, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'fra_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 592235, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'fra_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540828, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fra_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 536447, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'fra_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 562908, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'fra_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'fra_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'fra_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'fra_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'fra_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'fra_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'fra_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'fra_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'fra_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'fra_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'fra_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'fra_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'fra_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'fra_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'fra_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'fra_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'fra_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'fra_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'fra_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'fra_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'fra_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'fra_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.77, 'max_sentence1_length': 512, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'fuc_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 526972, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'fuc_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 476200, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'fuc_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 467458, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'fuc_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'fuc_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'fuc_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'fuc_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'fuc_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 114.41, 'max_sentence1_length': 376, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'gle_Latn-cym_Latn': {'num_samples': 1997, 'number_of_characters': 561314, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.45, 'max_sentence2_length': 444, 'unique_sentence2': 1997}, 'gle_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 542529, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 147.63, 'max_sentence1_length': 461, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 554946, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'glg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 519706, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'glg_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565094, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'glg_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'glg_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'glg_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'glg_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'glg_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 136.2, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'guj_Gujr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 488733, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'guj_Gujr-div_Thaa': {'num_samples': 1997, 'number_of_characters': 548779, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'guj_Gujr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 492651, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'guj_Gujr-eus_Latn': {'num_samples': 1997, 'number_of_characters': 520134, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'guj_Gujr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'guj_Gujr-kan_Knda': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'guj_Gujr-mar_Deva': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'guj_Gujr-nep_Deva': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-pan_Guru': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'guj_Gujr-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'guj_Gujr-snd_Arab': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'guj_Gujr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'guj_Gujr-tel_Telu': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'guj_Gujr-urd_Arab': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 122.65, 'max_sentence1_length': 378, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hau_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 437473, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'hau_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 517686, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hau_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'hau_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'hau_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'hau_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'hau_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'hau_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hau_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'hau_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'hau_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'hau_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'hau_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'hau_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 135.19, 'max_sentence1_length': 483, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'heb_Hebr-arb_Arab': {'num_samples': 1997, 'number_of_characters': 431477, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'heb_Hebr-ben_Beng': {'num_samples': 1997, 'number_of_characters': 444098, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'heb_Hebr-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 452663, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'heb_Hebr-deu_Latn': {'num_samples': 1997, 'number_of_characters': 495946, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'heb_Hebr-ell_Grek': {'num_samples': 1997, 'number_of_characters': 499423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'heb_Hebr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 448016, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'heb_Hebr-fas_Arab': {'num_samples': 1997, 'number_of_characters': 443635, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'heb_Hebr-fin_Latn': {'num_samples': 1997, 'number_of_characters': 470096, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'heb_Hebr-fra_Latn': {'num_samples': 1997, 'number_of_characters': 493404, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'heb_Hebr-hin_Deva': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'heb_Hebr-hun_Latn': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'heb_Hebr-ind_Latn': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'heb_Hebr-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'heb_Hebr-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'heb_Hebr-kor_Hang': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'heb_Hebr-lit_Latn': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'heb_Hebr-mey_Arab': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'heb_Hebr-nld_Latn': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'heb_Hebr-pol_Latn': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'heb_Hebr-por_Latn': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'heb_Hebr-prs_Arab': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'heb_Hebr-pus_Arab': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'heb_Hebr-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'heb_Hebr-shi_Arab': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'heb_Hebr-spa_Latn': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'heb_Hebr-swa_Latn': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'heb_Hebr-swe_Latn': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'heb_Hebr-tam_Taml': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'heb_Hebr-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'heb_Hebr-tur_Latn': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'heb_Hebr-vie_Latn': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'heb_Hebr-zho_Hant': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'heb_Hebr-zul_Latn': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 100.3, 'max_sentence1_length': 375, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hin_Deva-arb_Arab': {'num_samples': 1997, 'number_of_characters': 492756, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hin_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 505377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hin_Deva-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557225, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hin_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 565423, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'hin_Deva-ell_Grek': {'num_samples': 1997, 'number_of_characters': 560702, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hin_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509295, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hin_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536778, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'hin_Deva-fas_Arab': {'num_samples': 1997, 'number_of_characters': 504914, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hin_Deva-fin_Latn': {'num_samples': 1997, 'number_of_characters': 531375, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hin_Deva-fra_Latn': {'num_samples': 1997, 'number_of_characters': 554683, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hin_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 506506, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'hin_Deva-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 461871, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hin_Deva-hun_Latn': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'hin_Deva-ind_Latn': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hin_Deva-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hin_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'hin_Deva-kor_Hang': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hin_Deva-lit_Latn': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hin_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'hin_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-nld_Latn': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hin_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'hin_Deva-pol_Latn': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hin_Deva-por_Latn': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hin_Deva-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hin_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'hin_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'hin_Deva-spa_Latn': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hin_Deva-swa_Latn': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hin_Deva-swe_Latn': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hin_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hin_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'hin_Deva-tur_Latn': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hin_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'hin_Deva-vie_Latn': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hin_Deva-zho_Hant': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hin_Deva-zul_Latn': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 130.98, 'max_sentence1_length': 394, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hmn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 578510, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 165.64, 'max_sentence1_length': 643, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512015, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'hrv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 510835, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'hrv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 525814, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'hrv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497243, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'hrv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503645, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hrv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'hrv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hrv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hrv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'hrv_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hrv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'hrv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'hrv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 128.15, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'hun_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509557, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'hun_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 522178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'hun_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 574026, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'hun_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 577503, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hun_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 526096, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hun_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521715, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'hun_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 548176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'hun_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 571484, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'hun_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478672, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'hun_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539951, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'hun_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'hun_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'hun_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'hun_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'hun_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'hun_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'hun_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'hun_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'hun_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'hun_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'hun_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'hun_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'hun_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'hun_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'hun_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'hun_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'hun_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 139.4, 'max_sentence1_length': 508, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'hye_Armn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 563842, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'hye_Armn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 512435, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'hye_Armn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'hye_Armn-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 132.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'ibo_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 413608, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ibo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ibo_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 516067, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ibo_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ibo_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ibo_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ibo_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'ibo_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ibo_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ibo_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ibo_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ibo_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ibo_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ibo_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 123.24, 'max_sentence1_length': 469, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ind_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 518153, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'ind_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 530774, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'ind_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 582622, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ind_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 586099, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'ind_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 534692, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ind_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 530311, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'ind_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 587477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ind_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 580392, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ind_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 556772, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ind_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 580080, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ind_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 487268, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'ind_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 548547, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'ind_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 565348, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ind_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'ind_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'ind_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'ind_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ind_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ind_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ind_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ind_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ind_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ind_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ind_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ind_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ind_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'ind_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ind_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ind_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'ind_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'ind_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'ind_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'ind_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ind_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'ind_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 143.7, 'max_sentence1_length': 486, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'isl_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 530560, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'isl_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 514346, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'isl_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 557858, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'isl_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 509928, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'isl_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 520011, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'isl_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'isl_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'isl_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'isl_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'isl_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 131.3, 'max_sentence1_length': 399, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'ita_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 572177, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ita_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536937, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ita_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 582325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ita_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 561203, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ita_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ita_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ita_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'ita_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 144.83, 'max_sentence1_length': 623, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-arb_Arab': {'num_samples': 1997, 'number_of_characters': 342807, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'jpn_Jpan-ben_Beng': {'num_samples': 1997, 'number_of_characters': 355428, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'jpn_Jpan-deu_Latn': {'num_samples': 1997, 'number_of_characters': 407276, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'jpn_Jpan-ell_Grek': {'num_samples': 1997, 'number_of_characters': 410753, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'jpn_Jpan-eng_Latn': {'num_samples': 1997, 'number_of_characters': 359346, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'jpn_Jpan-fas_Arab': {'num_samples': 1997, 'number_of_characters': 354965, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'jpn_Jpan-fin_Latn': {'num_samples': 1997, 'number_of_characters': 381426, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'jpn_Jpan-fra_Latn': {'num_samples': 1997, 'number_of_characters': 404734, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'jpn_Jpan-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 311922, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'jpn_Jpan-hin_Deva': {'num_samples': 1997, 'number_of_characters': 373201, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'jpn_Jpan-hun_Latn': {'num_samples': 1997, 'number_of_characters': 390002, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'jpn_Jpan-ind_Latn': {'num_samples': 1997, 'number_of_characters': 398598, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'jpn_Jpan-kor_Hang': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'jpn_Jpan-lit_Latn': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'jpn_Jpan-nld_Latn': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'jpn_Jpan-pol_Latn': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'jpn_Jpan-por_Latn': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'jpn_Jpan-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'jpn_Jpan-spa_Latn': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'jpn_Jpan-swa_Latn': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'jpn_Jpan-swe_Latn': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'jpn_Jpan-tam_Taml': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'jpn_Jpan-tur_Latn': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'jpn_Jpan-vie_Latn': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'jpn_Jpan-yue_Hant': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'jpn_Jpan-zho_Hans': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'jpn_Jpan-zho_Hant': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'jpn_Jpan-zul_Latn': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 55.9, 'max_sentence1_length': 189, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'kan_Knda-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509338, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kan_Knda-div_Thaa': {'num_samples': 1997, 'number_of_characters': 569384, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'kan_Knda-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513256, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kan_Knda-eus_Latn': {'num_samples': 1997, 'number_of_characters': 540739, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'kan_Knda-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 510467, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'kan_Knda-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527111, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kan_Knda-mar_Deva': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'kan_Knda-nep_Deva': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-pan_Guru': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'kan_Knda-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kan_Knda-snd_Arab': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'kan_Knda-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kan_Knda-tel_Telu': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'kan_Knda-urd_Arab': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 132.97, 'max_sentence1_length': 449, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'kat_Geor-ell_Grek': {'num_samples': 1997, 'number_of_characters': 565719, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kat_Geor-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514312, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kat_Geor-hye_Armn': {'num_samples': 1997, 'number_of_characters': 531307, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'kat_Geor-sqi_Latn': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 133.5, 'max_sentence1_length': 503, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 142.02, 'max_sentence2_length': 461, 'unique_sentence2': 1996}, 'kaz_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 529910, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kaz_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 506602, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kaz_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507996, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kaz_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'kaz_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kaz_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kaz_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kaz_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kaz_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 130.33, 'max_sentence1_length': 473, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'khm_Khmr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 589120, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'khm_Khmr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 531712, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'khm_Khmr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 536211, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'khm_Khmr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'khm_Khmr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'khm_Khmr-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'khm_Khmr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 144.46, 'max_sentence1_length': 517, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'kin_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 602279, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'kin_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551507, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kin_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 542765, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'kin_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 532267, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'kin_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'kin_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'kin_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'kin_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 152.12, 'max_sentence1_length': 541, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'kir_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 520498, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'kir_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 497190, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'kir_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498584, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kir_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 511140, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'kir_Cyrl-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kir_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'kir_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kir_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'kir_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.62, 'max_sentence1_length': 395, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'kmr_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 477127, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kmr_Latn-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 498313, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'kmr_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493666, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kmr_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 489285, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kmr_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 446242, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kmr_Latn-mey_Arab': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'kmr_Latn-prs_Arab': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'kmr_Latn-pus_Arab': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'kmr_Latn-shi_Arab': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'kmr_Latn-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.16, 'max_sentence1_length': 420, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'kor_Hang-arb_Arab': {'num_samples': 1997, 'number_of_characters': 364586, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'kor_Hang-ben_Beng': {'num_samples': 1997, 'number_of_characters': 377207, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'kor_Hang-deu_Latn': {'num_samples': 1997, 'number_of_characters': 429055, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'kor_Hang-ell_Grek': {'num_samples': 1997, 'number_of_characters': 432532, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'kor_Hang-eng_Latn': {'num_samples': 1997, 'number_of_characters': 381125, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'kor_Hang-fas_Arab': {'num_samples': 1997, 'number_of_characters': 376744, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'kor_Hang-fin_Latn': {'num_samples': 1997, 'number_of_characters': 403205, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'kor_Hang-fra_Latn': {'num_samples': 1997, 'number_of_characters': 426513, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'kor_Hang-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 333701, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'kor_Hang-hin_Deva': {'num_samples': 1997, 'number_of_characters': 394980, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'kor_Hang-hun_Latn': {'num_samples': 1997, 'number_of_characters': 411781, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'kor_Hang-ind_Latn': {'num_samples': 1997, 'number_of_characters': 420377, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'kor_Hang-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 245031, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'kor_Hang-lit_Latn': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'kor_Hang-nld_Latn': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'kor_Hang-pol_Latn': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'kor_Hang-por_Latn': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'kor_Hang-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'kor_Hang-spa_Latn': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'kor_Hang-swa_Latn': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'kor_Hang-swe_Latn': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'kor_Hang-tam_Taml': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'kor_Hang-tur_Latn': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'kor_Hang-vie_Latn': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'kor_Hang-yue_Hant': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'kor_Hang-zho_Hans': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'kor_Hang-zho_Hant': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'kor_Hang-zul_Latn': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 66.8, 'max_sentence1_length': 217, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'lao_Laoo-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 567609, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'lao_Laoo-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 510201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'lao_Laoo-eng_Latn': {'num_samples': 1997, 'number_of_characters': 514700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lao_Laoo-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 555471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'lao_Laoo-mon_Mong': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'lao_Laoo-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'lao_Laoo-tha_Thai': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 133.69, 'max_sentence1_length': 507, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'lav_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515908, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lav_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 537988, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lav_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 546564, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lav_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 7, 'average_sentence1_length': 134.3, 'max_sentence1_length': 503, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'lit_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 490578, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'lit_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 503199, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'lit_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 555047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'lit_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 558524, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'lit_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 507117, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'lit_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 502736, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'lit_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 529197, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'lit_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 552505, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'lit_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 459693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'lit_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520972, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'lit_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 537773, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'lit_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 546369, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'lit_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 371023, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'lit_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 392802, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'lit_Latn-lav_Latn': {'num_samples': 1997, 'number_of_characters': 527585, 'unique_pairs': 1995, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 134.3, 'max_sentence2_length': 503, 'unique_sentence2': 1994}, 'lit_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'lit_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'lit_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'lit_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'lit_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'lit_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'lit_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'lit_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'lit_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'lit_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'lit_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'lit_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 129.89, 'max_sentence1_length': 446, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ltz_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 549109, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'ltz_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 532895, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'ltz_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 576407, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'ltz_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 528477, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ltz_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 538560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'ltz_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 542965, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'ltz_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'ltz_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'ltz_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'ltz_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 140.59, 'max_sentence1_length': 543, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'mal_Mlym-eng_Latn': {'num_samples': 1997, 'number_of_characters': 551872, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mal_Mlym-fij_Latn': {'num_samples': 1997, 'number_of_characters': 604657, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mal_Mlym-fil_Latn': {'num_samples': 1997, 'number_of_characters': 597572, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mal_Mlym-ind_Latn': {'num_samples': 1997, 'number_of_characters': 591124, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mal_Mlym-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mal_Mlym-mri_Latn': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mal_Mlym-msa_Latn': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mal_Mlym-smo_Latn': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mal_Mlym-tah_Latn': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mal_Mlym-ton_Latn': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 152.3, 'max_sentence1_length': 540, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mar_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 504689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'mar_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 564735, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'mar_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 508607, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mar_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 536090, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'mar_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 505818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'mar_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 522462, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'mar_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 526423, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'mar_Deva-nep_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'mar_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'mar_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'mar_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'mar_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'mar_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 130.64, 'max_sentence1_length': 443, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'mey_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 445016, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'mey_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 466202, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'mey_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 461555, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mey_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 457174, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'mey_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 414131, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'mey_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 459781, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'mey_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'mey_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'mey_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'mey_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 107.08, 'max_sentence1_length': 392, 'unique_sentence1': 1993, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'mkd_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 523981, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'mkd_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 522801, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'mkd_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 537780, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'mkd_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 509209, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'mkd_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515611, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mkd_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 523816, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'mkd_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mkd_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'mkd_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'mkd_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'mkd_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'mkd_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'mkd_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.15, 'max_sentence1_length': 451, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'mlg_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 568028, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlg_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 620813, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mlg_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 613728, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mlg_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 607280, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mlg_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 624460, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mlg_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'mlg_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mlg_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mlg_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mlg_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 160.39, 'max_sentence1_length': 559, 'unique_sentence1': 1994, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mlt_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560435, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'mlt_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525195, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mlt_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570583, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'mlt_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549461, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'mlt_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566692, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'mlt_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'mlt_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'mlt_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 138.95, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'mon_Mong-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 559677, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mon_Mong-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 502269, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mon_Mong-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506768, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mon_Mong-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 547539, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mon_Mong-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 526028, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mon_Mong-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'mon_Mong-tha_Thai': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 129.72, 'max_sentence1_length': 414, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'mri_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521844, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mri_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 574629, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'mri_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 567544, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'mri_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561096, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'mri_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 578276, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'mri_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 594432, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'mri_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'mri_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'mri_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'mri_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 137.27, 'max_sentence1_length': 443, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'msa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524903, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'msa_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 577688, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'msa_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 570603, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'msa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564155, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'msa_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 581335, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'msa_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 597491, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'msa_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551307, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'msa_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'msa_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'msa_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 138.8, 'max_sentence1_length': 463, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'mya_Mymr-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 612483, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'mya_Mymr-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 555075, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'mya_Mymr-eng_Latn': {'num_samples': 1997, 'number_of_characters': 559574, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'mya_Mymr-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 600345, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'mya_Mymr-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 578834, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'mya_Mymr-mon_Mong': {'num_samples': 1997, 'number_of_characters': 570902, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'mya_Mymr-tha_Thai': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 156.16, 'max_sentence1_length': 773, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 118.91, 'max_sentence2_length': 439, 'unique_sentence2': 1996}, 'nde_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596231, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nde_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 545459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nde_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 536717, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nde_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526219, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nde_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 601526, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nde_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'nde_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nde_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.09, 'max_sentence1_length': 590, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'nep_Deva-ben_Beng': {'num_samples': 1997, 'number_of_characters': 492025, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nep_Deva-div_Thaa': {'num_samples': 1997, 'number_of_characters': 552071, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'nep_Deva-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495943, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nep_Deva-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523426, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'nep_Deva-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 493154, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'nep_Deva-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509798, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nep_Deva-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513759, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'nep_Deva-mar_Deva': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'nep_Deva-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'nep_Deva-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'nep_Deva-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'nep_Deva-tam_Taml': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nep_Deva-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'nep_Deva-urd_Arab': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 124.3, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'nld_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 560267, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 523096, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'nld_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 535717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'nld_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 544053, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nld_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 587565, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nld_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 591042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'nld_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539635, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nld_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 549718, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nld_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 535254, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'nld_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 561715, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'nld_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585023, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'nld_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 492211, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'nld_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 553490, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'nld_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 570291, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'nld_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 578887, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'nld_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 554123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nld_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 403541, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'nld_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 425320, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'nld_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 551312, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'nld_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 572672, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nld_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nld_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nld_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'nld_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'nld_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'nld_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'nld_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nld_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nld_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'nld_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'nld_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nld_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'nld_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 146.18, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nno_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 516709, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nno_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 500495, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nno_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 544007, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nno_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 496077, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nno_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 506160, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nno_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 510565, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nno_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 529114, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nno_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 540272, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nno_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'nno_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.37, 'max_sentence1_length': 417, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nob_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 519796, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'nob_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503582, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'nob_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547094, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'nob_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499164, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nob_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509247, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'nob_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 513652, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'nob_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532201, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'nob_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543359, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'nob_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 499801, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'nob_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 125.91, 'max_sentence1_length': 482, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'nso_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 459006, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'nso_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539219, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nso_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561465, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'nso_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537600, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'nso_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'nso_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'nso_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'nso_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'nso_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'nso_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'nso_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'nso_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'nso_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'nso_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 145.97, 'max_sentence1_length': 487, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'nya_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 582774, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'nya_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'nya_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 523260, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'nya_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 512762, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'nya_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 588069, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'nya_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 582021, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'nya_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'nya_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 142.35, 'max_sentence1_length': 464, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'orm_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 404938, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'orm_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485151, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'orm_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 507397, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'orm_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 483532, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'orm_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 528930, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'orm_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'orm_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'orm_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'orm_Ethi-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'orm_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'orm_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'orm_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'orm_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'orm_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 118.89, 'max_sentence1_length': 466, 'unique_sentence1': 1984, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'pan_Guru-ben_Beng': {'num_samples': 1997, 'number_of_characters': 494224, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pan_Guru-div_Thaa': {'num_samples': 1997, 'number_of_characters': 554270, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'pan_Guru-eng_Latn': {'num_samples': 1997, 'number_of_characters': 498142, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pan_Guru-eus_Latn': {'num_samples': 1997, 'number_of_characters': 525625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'pan_Guru-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 495353, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'pan_Guru-hin_Deva': {'num_samples': 1997, 'number_of_characters': 511997, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pan_Guru-kan_Knda': {'num_samples': 1997, 'number_of_characters': 515958, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'pan_Guru-mar_Deva': {'num_samples': 1997, 'number_of_characters': 511309, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'pan_Guru-nep_Deva': {'num_samples': 1997, 'number_of_characters': 498645, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'pan_Guru-snd_Arab': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'pan_Guru-tam_Taml': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pan_Guru-tel_Telu': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'pan_Guru-urd_Arab': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 125.4, 'max_sentence1_length': 383, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'pol_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 509047, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pol_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 533956, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'pol_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521668, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'pol_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 532776, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'pol_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 547755, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'pol_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 519184, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'pol_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 573516, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'pol_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576993, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'pol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525586, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pol_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 521205, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pol_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547666, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570974, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'pol_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 478162, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pol_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 539441, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'pol_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 533791, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 556242, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'pol_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564838, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'pol_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 389492, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'pol_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 411271, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'pol_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 537263, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'pol_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 545757, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'pol_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569781, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'pol_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'pol_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'pol_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'pol_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'pol_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'pol_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'pol_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'pol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'pol_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'pol_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'pol_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'pol_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'pol_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'pol_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'pol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 139.14, 'max_sentence1_length': 468, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'por_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 508396, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'por_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 521017, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'por_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 560175, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'por_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 572865, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'por_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 576342, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'por_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 524935, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'por_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 520554, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'por_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 547015, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'por_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 570323, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'por_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 549201, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 477511, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'por_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 538790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'por_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 555591, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'por_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564187, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'por_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 566432, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'por_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 388841, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'por_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 410620, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'por_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 536612, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'por_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 554690, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'por_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 569130, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'por_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 555081, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'por_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'por_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'por_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'por_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'por_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'por_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'por_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'por_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'por_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'por_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 138.82, 'max_sentence1_length': 497, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'prs_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'prs_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 494903, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'prs_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490256, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'prs_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485875, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'prs_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442832, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'prs_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488482, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'prs_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456371, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'prs_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'prs_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'prs_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.45, 'max_sentence1_length': 365, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'pus_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 473814, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'pus_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 495000, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'pus_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 490353, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'pus_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 485972, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'pus_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 442929, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'pus_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 488579, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'pus_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 456468, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'pus_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 485169, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'pus_Arab-shi_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'pus_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 121.5, 'max_sentence1_length': 366, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'ron_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 575445, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'ron_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 540205, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ron_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 585593, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'ron_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 564471, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ron_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 581702, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'ron_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 569960, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ron_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 569700, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'ron_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 146.46, 'max_sentence1_length': 518, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 506074, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'rus_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 530983, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'rus_Cyrl-ben_Beng': {'num_samples': 1997, 'number_of_characters': 518695, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'rus_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 529803, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'rus_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 544782, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'rus_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 516211, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'rus_Cyrl-deu_Latn': {'num_samples': 1997, 'number_of_characters': 570543, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'rus_Cyrl-ell_Grek': {'num_samples': 1997, 'number_of_characters': 574020, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'rus_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 522613, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'rus_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 518232, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'rus_Cyrl-fin_Latn': {'num_samples': 1997, 'number_of_characters': 544693, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-fra_Latn': {'num_samples': 1997, 'number_of_characters': 568001, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'rus_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 475189, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'rus_Cyrl-hin_Deva': {'num_samples': 1997, 'number_of_characters': 536468, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'rus_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 530818, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-hun_Latn': {'num_samples': 1997, 'number_of_characters': 553269, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'rus_Cyrl-ind_Latn': {'num_samples': 1997, 'number_of_characters': 561865, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'rus_Cyrl-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 386519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'rus_Cyrl-kor_Hang': {'num_samples': 1997, 'number_of_characters': 408298, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'rus_Cyrl-lit_Latn': {'num_samples': 1997, 'number_of_characters': 534290, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'rus_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 542784, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'rus_Cyrl-nld_Latn': {'num_samples': 1997, 'number_of_characters': 566808, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'rus_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 552759, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'rus_Cyrl-por_Latn': {'num_samples': 1997, 'number_of_characters': 552108, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'rus_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'rus_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'rus_Cyrl-spa_Latn': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'rus_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'rus_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'rus_Cyrl-swa_Latn': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'rus_Cyrl-swe_Latn': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'rus_Cyrl-tam_Taml': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'rus_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'rus_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'rus_Cyrl-vie_Latn': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'rus_Cyrl-zho_Hant': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'rus_Cyrl-zul_Latn': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 137.65, 'max_sentence1_length': 419, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'shi_Arab-arb_Arab': {'num_samples': 1997, 'number_of_characters': 446094, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'shi_Arab-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 467280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'shi_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 462633, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'shi_Arab-fas_Arab': {'num_samples': 1997, 'number_of_characters': 458252, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'shi_Arab-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 415209, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'shi_Arab-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 460859, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'shi_Arab-mey_Arab': {'num_samples': 1997, 'number_of_characters': 428748, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'shi_Arab-prs_Arab': {'num_samples': 1997, 'number_of_characters': 457449, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'shi_Arab-pus_Arab': {'num_samples': 1997, 'number_of_characters': 457546, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'shi_Arab-tgk_Cyrl': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 107.62, 'max_sentence1_length': 378, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 137.28, 'max_sentence2_length': 451, 'unique_sentence2': 1995}, 'sin_Sinh-ben_Beng': {'num_samples': 1997, 'number_of_characters': 502543, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'sin_Sinh-div_Thaa': {'num_samples': 1997, 'number_of_characters': 562589, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'sin_Sinh-eng_Latn': {'num_samples': 1997, 'number_of_characters': 506461, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sin_Sinh-eus_Latn': {'num_samples': 1997, 'number_of_characters': 533944, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'sin_Sinh-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 503672, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'sin_Sinh-hin_Deva': {'num_samples': 1997, 'number_of_characters': 520316, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'sin_Sinh-kan_Knda': {'num_samples': 1997, 'number_of_characters': 524277, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'sin_Sinh-mar_Deva': {'num_samples': 1997, 'number_of_characters': 519628, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'sin_Sinh-nep_Deva': {'num_samples': 1997, 'number_of_characters': 506964, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-pan_Guru': {'num_samples': 1997, 'number_of_characters': 509163, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'sin_Sinh-snd_Arab': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'sin_Sinh-tam_Taml': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'sin_Sinh-tel_Telu': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'sin_Sinh-urd_Arab': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 129.56, 'max_sentence1_length': 441, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'slk_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 509059, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slk_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507879, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slk_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522858, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slk_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494287, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500689, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slk_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508894, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slk_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520860, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slk_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530835, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slk_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527862, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slk_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'slk_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slk_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slk_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 126.67, 'max_sentence1_length': 403, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'slv_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508986, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'slv_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507806, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'slv_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522785, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'slv_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 494214, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'slv_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500616, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'slv_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508821, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'slv_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520787, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'slv_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530762, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'slv_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527789, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'slv_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505865, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'slv_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'slv_Latn-srp_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'slv_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.64, 'max_sentence1_length': 463, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'smo_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 525575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'smo_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 578360, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'smo_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 571275, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'smo_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 564827, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'smo_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 582007, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'smo_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 598163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'smo_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 551979, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'smo_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 555038, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'smo_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'smo_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 139.14, 'max_sentence1_length': 431, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'sna_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 596822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'sna_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 546050, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sna_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 537308, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'sna_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 526810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'sna_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 602117, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'sna_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 596069, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'sna_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 582612, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'sna_Latn-ven_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 6, 'average_sentence1_length': 149.39, 'max_sentence1_length': 511, 'unique_sentence1': 1995, 'min_sentence2_length': 10, 'average_sentence2_length': 150.1, 'max_sentence2_length': 535, 'unique_sentence2': 1993}, 'snd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 464129, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'snd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 524175, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'snd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 468047, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'snd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 495530, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'snd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 465258, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'snd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 481902, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'snd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 485863, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'snd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 481214, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'snd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 468550, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 470749, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'snd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 479068, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'snd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'snd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'snd_Arab-urd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 110.33, 'max_sentence1_length': 335, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'som_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 458799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'som_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 539012, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'som_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 561258, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'som_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 537393, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'som_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 582791, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'som_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 528723, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'som_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'som_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'som_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'som_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'som_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'som_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'som_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'som_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 145.86, 'max_sentence1_length': 455, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'spa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 519381, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'spa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 532002, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'spa_Latn-cat_Latn': {'num_samples': 1997, 'number_of_characters': 571160, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 141.69, 'max_sentence2_length': 460, 'unique_sentence2': 1997}, 'spa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 583850, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'spa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 587327, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'spa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535920, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'spa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 531539, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'spa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 558000, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'spa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 581308, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'spa_Latn-glg_Latn': {'num_samples': 1997, 'number_of_characters': 560186, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 136.2, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 488496, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'spa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 549775, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'spa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 566576, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'spa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 575172, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'spa_Latn-ita_Latn': {'num_samples': 1997, 'number_of_characters': 577417, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 144.83, 'max_sentence2_length': 623, 'unique_sentence2': 1996}, 'spa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 399826, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'spa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 421605, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'spa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 547597, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'spa_Latn-mlt_Latn': {'num_samples': 1997, 'number_of_characters': 565675, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 138.95, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'spa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 580115, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'spa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 566066, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'spa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 565415, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'spa_Latn-ron_Latn': {'num_samples': 1997, 'number_of_characters': 580685, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 146.46, 'max_sentence2_length': 518, 'unique_sentence2': 1997}, 'spa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 563093, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'spa_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'spa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'spa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'spa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'spa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'spa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'spa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 1, 'average_sentence1_length': 144.32, 'max_sentence1_length': 504, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'sqi_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 582734, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'sqi_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 531327, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'sqi_Latn-hye_Armn': {'num_samples': 1997, 'number_of_characters': 548322, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 132.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'sqi_Latn-kat_Geor': {'num_samples': 1997, 'number_of_characters': 550199, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 142.02, 'max_sentence1_length': 461, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 133.5, 'max_sentence2_length': 503, 'unique_sentence2': 1995}, 'srp_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 508393, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 507213, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 522192, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 493621, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 500023, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 508228, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 520194, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 530169, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 527196, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 505272, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 505199, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'srp_Cyrl-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 126.34, 'max_sentence1_length': 439, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'srp_Latn-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 512231, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'srp_Latn-bos_Latn': {'num_samples': 1997, 'number_of_characters': 511051, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'srp_Latn-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 526030, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'srp_Latn-ces_Latn': {'num_samples': 1997, 'number_of_characters': 497459, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'srp_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 503861, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'srp_Latn-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 512066, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'srp_Latn-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 524032, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'srp_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 534007, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'srp_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 531034, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'srp_Latn-slk_Latn': {'num_samples': 1997, 'number_of_characters': 509110, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'srp_Latn-slv_Latn': {'num_samples': 1997, 'number_of_characters': 509037, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'srp_Latn-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 508444, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'srp_Latn-ukr_Cyrl': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 128.26, 'max_sentence1_length': 452, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 131.59, 'max_sentence2_length': 440, 'unique_sentence2': 1996}, 'ssw_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 455649, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'ssw_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 535862, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ssw_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 558108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'ssw_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 534243, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'ssw_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 579641, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'ssw_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 525573, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'ssw_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 579434, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'ssw_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'ssw_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'ssw_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'ssw_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'ssw_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'ssw_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'ssw_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 144.29, 'max_sentence1_length': 510, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swa_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 440016, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'swa_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 503690, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swa_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 516311, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swa_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 568159, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swa_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 571636, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swa_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 520229, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swa_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 515848, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swa_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 542309, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swa_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 565617, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swa_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 542475, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'swa_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 472805, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swa_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 534084, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swa_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 550885, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swa_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 518610, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'swa_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 559481, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swa_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 384135, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swa_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 405914, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swa_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 531906, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swa_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 564424, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swa_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 564008, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'swa_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 509940, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'swa_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 550375, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swa_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 549724, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swa_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 547402, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swa_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 563801, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'swa_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 560709, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swa_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 560651, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'swa_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'swa_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swa_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'swa_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'swa_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swa_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swa_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'swa_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'swa_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'swa_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swa_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 136.46, 'max_sentence1_length': 430, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'swe_Latn-afr_Latn': {'num_samples': 1997, 'number_of_characters': 520179, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 134.38, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 483008, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'swe_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 495629, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'swe_Latn-dan_Latn': {'num_samples': 1997, 'number_of_characters': 503965, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 126.26, 'max_sentence2_length': 522, 'unique_sentence2': 1995}, 'swe_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 547477, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'swe_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 550954, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'swe_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 499547, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'swe_Latn-fao_Latn': {'num_samples': 1997, 'number_of_characters': 509630, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.1, 'max_sentence2_length': 433, 'unique_sentence2': 1997}, 'swe_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 495166, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'swe_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 521627, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'swe_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 544935, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'swe_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 452123, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'swe_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 513402, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'swe_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 530203, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'swe_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 538799, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'swe_Latn-isl_Latn': {'num_samples': 1997, 'number_of_characters': 514035, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 131.3, 'max_sentence2_length': 399, 'unique_sentence2': 1996}, 'swe_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 363453, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'swe_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 385232, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'swe_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 511224, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'swe_Latn-ltz_Latn': {'num_samples': 1997, 'number_of_characters': 532584, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 140.59, 'max_sentence2_length': 543, 'unique_sentence2': 1996}, 'swe_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 543742, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'swe_Latn-nno_Latn': {'num_samples': 1997, 'number_of_characters': 500184, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.37, 'max_sentence2_length': 417, 'unique_sentence2': 1996}, 'swe_Latn-nob_Latn': {'num_samples': 1997, 'number_of_characters': 503271, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.91, 'max_sentence2_length': 482, 'unique_sentence2': 1996}, 'swe_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 529693, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'swe_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 529042, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'swe_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 526720, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'swe_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 540027, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'swe_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 524336, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'swe_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'swe_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'swe_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'swe_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'swe_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 126.1, 'max_sentence1_length': 430, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tah_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557343, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tah_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 610128, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'tah_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 603043, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'tah_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596595, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tah_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 613775, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'tah_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 629931, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'tah_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 583747, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'tah_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 586806, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'tah_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 587478, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'tah_Latn-ton_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 155.04, 'max_sentence1_length': 524, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 157.06, 'max_sentence2_length': 468, 'unique_sentence2': 1997}, 'tam_Taml-arb_Arab': {'num_samples': 1997, 'number_of_characters': 541142, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tam_Taml-ben_Beng': {'num_samples': 1997, 'number_of_characters': 553763, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tam_Taml-deu_Latn': {'num_samples': 1997, 'number_of_characters': 605611, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tam_Taml-div_Thaa': {'num_samples': 1997, 'number_of_characters': 613809, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tam_Taml-ell_Grek': {'num_samples': 1997, 'number_of_characters': 609088, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tam_Taml-eng_Latn': {'num_samples': 1997, 'number_of_characters': 557681, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tam_Taml-eus_Latn': {'num_samples': 1997, 'number_of_characters': 585164, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tam_Taml-fas_Arab': {'num_samples': 1997, 'number_of_characters': 553300, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tam_Taml-fin_Latn': {'num_samples': 1997, 'number_of_characters': 579761, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tam_Taml-fra_Latn': {'num_samples': 1997, 'number_of_characters': 603069, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tam_Taml-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 554892, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tam_Taml-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 510257, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tam_Taml-hin_Deva': {'num_samples': 1997, 'number_of_characters': 571536, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tam_Taml-hun_Latn': {'num_samples': 1997, 'number_of_characters': 588337, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tam_Taml-ind_Latn': {'num_samples': 1997, 'number_of_characters': 596933, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tam_Taml-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 421587, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tam_Taml-kan_Knda': {'num_samples': 1997, 'number_of_characters': 575497, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tam_Taml-kor_Hang': {'num_samples': 1997, 'number_of_characters': 443366, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tam_Taml-lit_Latn': {'num_samples': 1997, 'number_of_characters': 569358, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tam_Taml-mar_Deva': {'num_samples': 1997, 'number_of_characters': 570848, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tam_Taml-nep_Deva': {'num_samples': 1997, 'number_of_characters': 558184, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-nld_Latn': {'num_samples': 1997, 'number_of_characters': 601876, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tam_Taml-pan_Guru': {'num_samples': 1997, 'number_of_characters': 560383, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tam_Taml-pol_Latn': {'num_samples': 1997, 'number_of_characters': 587827, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tam_Taml-por_Latn': {'num_samples': 1997, 'number_of_characters': 587176, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tam_Taml-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 584854, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tam_Taml-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 568702, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tam_Taml-snd_Arab': {'num_samples': 1997, 'number_of_characters': 530288, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tam_Taml-spa_Latn': {'num_samples': 1997, 'number_of_characters': 598161, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tam_Taml-swa_Latn': {'num_samples': 1997, 'number_of_characters': 582470, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tam_Taml-swe_Latn': {'num_samples': 1997, 'number_of_characters': 561788, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tam_Taml-tel_Telu': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tam_Taml-tur_Latn': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tam_Taml-urd_Arab': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tam_Taml-vie_Latn': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tam_Taml-zho_Hant': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tam_Taml-zul_Latn': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 11, 'average_sentence1_length': 155.21, 'max_sentence1_length': 581, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tat_Cyrl-aze_Latn': {'num_samples': 1997, 'number_of_characters': 515560, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tat_Cyrl-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 492252, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tat_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 493646, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tat_Cyrl-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 506202, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tat_Cyrl-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 496790, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tat_Cyrl-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tat_Cyrl-tur_Latn': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tat_Cyrl-uig_Arab': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tat_Cyrl-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 123.15, 'max_sentence1_length': 539, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tel_Telu-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491329, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tel_Telu-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551375, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'tel_Telu-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495247, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tel_Telu-eus_Latn': {'num_samples': 1997, 'number_of_characters': 522730, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'tel_Telu-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492458, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'tel_Telu-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509102, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tel_Telu-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513063, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'tel_Telu-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508414, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'tel_Telu-nep_Deva': {'num_samples': 1997, 'number_of_characters': 495750, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'tel_Telu-pan_Guru': {'num_samples': 1997, 'number_of_characters': 497949, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'tel_Telu-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506268, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'tel_Telu-snd_Arab': {'num_samples': 1997, 'number_of_characters': 467854, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'tel_Telu-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557488, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tel_Telu-urd_Arab': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 123.95, 'max_sentence1_length': 412, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.19, 'max_sentence2_length': 390, 'unique_sentence2': 1996}, 'tgk_Cyrl-arb_Arab': {'num_samples': 1997, 'number_of_characters': 505328, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tgk_Cyrl-ckb_Arab': {'num_samples': 1997, 'number_of_characters': 526514, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 5, 'average_sentence2_length': 126.37, 'max_sentence2_length': 399, 'unique_sentence2': 1995}, 'tgk_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 521867, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tgk_Cyrl-fas_Arab': {'num_samples': 1997, 'number_of_characters': 517486, 'unique_pairs': 1995, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tgk_Cyrl-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 474443, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tgk_Cyrl-kmr_Latn': {'num_samples': 1997, 'number_of_characters': 520093, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 7, 'average_sentence2_length': 123.16, 'max_sentence2_length': 420, 'unique_sentence2': 1996}, 'tgk_Cyrl-mey_Arab': {'num_samples': 1997, 'number_of_characters': 487982, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 6, 'average_sentence2_length': 107.08, 'max_sentence2_length': 392, 'unique_sentence2': 1993}, 'tgk_Cyrl-prs_Arab': {'num_samples': 1997, 'number_of_characters': 516683, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.45, 'max_sentence2_length': 365, 'unique_sentence2': 1997}, 'tgk_Cyrl-pus_Arab': {'num_samples': 1997, 'number_of_characters': 516780, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 8, 'average_sentence2_length': 121.5, 'max_sentence2_length': 366, 'unique_sentence2': 1996}, 'tgk_Cyrl-shi_Arab': {'num_samples': 1997, 'number_of_characters': 489060, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 137.28, 'max_sentence1_length': 451, 'unique_sentence1': 1995, 'min_sentence2_length': 3, 'average_sentence2_length': 107.62, 'max_sentence2_length': 378, 'unique_sentence2': 1996}, 'tha_Thai-bod_Tibt': {'num_samples': 1997, 'number_of_characters': 538097, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 150.54, 'max_sentence2_length': 478, 'unique_sentence2': 1993}, 'tha_Thai-dzo_Tibt': {'num_samples': 1997, 'number_of_characters': 480689, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 121.79, 'max_sentence2_length': 411, 'unique_sentence2': 1992}, 'tha_Thai-eng_Latn': {'num_samples': 1997, 'number_of_characters': 485188, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tha_Thai-khm_Khmr': {'num_samples': 1997, 'number_of_characters': 525959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 144.46, 'max_sentence2_length': 517, 'unique_sentence2': 1996}, 'tha_Thai-lao_Laoo': {'num_samples': 1997, 'number_of_characters': 504448, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 133.69, 'max_sentence2_length': 507, 'unique_sentence2': 1997}, 'tha_Thai-mon_Mong': {'num_samples': 1997, 'number_of_characters': 496516, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 129.72, 'max_sentence2_length': 414, 'unique_sentence2': 1997}, 'tha_Thai-mya_Mymr': {'num_samples': 1997, 'number_of_characters': 549322, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 118.91, 'max_sentence1_length': 439, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 156.16, 'max_sentence2_length': 773, 'unique_sentence2': 1997}, 'tir_Ethi-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 332745, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tir_Ethi-eng_Latn': {'num_samples': 1997, 'number_of_characters': 412958, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tir_Ethi-hau_Latn': {'num_samples': 1997, 'number_of_characters': 435204, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tir_Ethi-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 411339, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tir_Ethi-nso_Latn': {'num_samples': 1997, 'number_of_characters': 456737, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tir_Ethi-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 402669, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tir_Ethi-som_Latn': {'num_samples': 1997, 'number_of_characters': 456530, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tir_Ethi-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 453380, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tir_Ethi-swa_Latn': {'num_samples': 1997, 'number_of_characters': 437747, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tir_Ethi-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'tir_Ethi-wol_Latn': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tir_Ethi-xho_Latn': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tir_Ethi-yor_Latn': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tir_Ethi-zul_Latn': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 5, 'average_sentence1_length': 82.74, 'max_sentence1_length': 272, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'ton_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 561360, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ton_Latn-fij_Latn': {'num_samples': 1997, 'number_of_characters': 614145, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 150.48, 'max_sentence2_length': 448, 'unique_sentence2': 1988}, 'ton_Latn-fil_Latn': {'num_samples': 1997, 'number_of_characters': 607060, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 146.93, 'max_sentence2_length': 554, 'unique_sentence2': 1997}, 'ton_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 600612, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'ton_Latn-mal_Mlym': {'num_samples': 1997, 'number_of_characters': 617792, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 152.3, 'max_sentence2_length': 540, 'unique_sentence2': 1996}, 'ton_Latn-mlg_Latn': {'num_samples': 1997, 'number_of_characters': 633948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 160.39, 'max_sentence2_length': 559, 'unique_sentence2': 1994}, 'ton_Latn-mri_Latn': {'num_samples': 1997, 'number_of_characters': 587764, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 137.27, 'max_sentence2_length': 443, 'unique_sentence2': 1997}, 'ton_Latn-msa_Latn': {'num_samples': 1997, 'number_of_characters': 590823, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 138.8, 'max_sentence2_length': 463, 'unique_sentence2': 1997}, 'ton_Latn-smo_Latn': {'num_samples': 1997, 'number_of_characters': 591495, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 139.14, 'max_sentence2_length': 431, 'unique_sentence2': 1996}, 'ton_Latn-tah_Latn': {'num_samples': 1997, 'number_of_characters': 623263, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 157.06, 'max_sentence1_length': 468, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 155.04, 'max_sentence2_length': 524, 'unique_sentence2': 1997}, 'tsn_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 501790, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'tsn_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 582003, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tsn_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 604249, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'tsn_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 580384, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'tsn_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 625782, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'tsn_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 571714, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'tsn_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 625575, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'tsn_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 622425, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'tsn_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 606792, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tsn_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 499521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'tsn_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'tsn_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'tsn_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'tsn_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 167.39, 'max_sentence1_length': 556, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'tuk_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 554908, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tuk_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 531600, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tuk_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 532994, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tuk_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 545550, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tuk_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 536138, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tuk_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 531200, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tuk_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'tuk_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tuk_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 9, 'average_sentence1_length': 142.85, 'max_sentence1_length': 576, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 496794, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'tur_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 535247, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'tur_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 511939, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'tur_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 509415, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'tur_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 561263, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'tur_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 564740, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'tur_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 513333, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'tur_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 508952, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'tur_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 535413, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'tur_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 558721, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'tur_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 465909, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'tur_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 527188, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'tur_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 543989, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'tur_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 552585, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'tur_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 377239, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'tur_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 525889, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'tur_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 516477, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'tur_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 399018, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'tur_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 525010, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'tur_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 557528, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 543479, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'tur_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 542828, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'tur_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 540506, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'tur_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 553813, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'tur_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 538122, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'tur_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 517440, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'tur_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 575574, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'tur_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 511539, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'tur_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 550887, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'tur_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'tur_Latn-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'tur_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'tur_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'tur_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 133.01, 'max_sentence1_length': 504, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'uig_Arab-aze_Latn': {'num_samples': 1997, 'number_of_characters': 580656, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uig_Arab-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 557348, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uig_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 558742, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uig_Arab-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 571298, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uig_Arab-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 561886, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uig_Arab-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 556948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uig_Arab-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 596296, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uig_Arab-tur_Latn': {'num_samples': 1997, 'number_of_characters': 576635, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uig_Arab-uzb_Latn': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 155.74, 'max_sentence1_length': 592, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 147.07, 'max_sentence2_length': 470, 'unique_sentence2': 1996}, 'ukr_Cyrl-bel_Cyrl': {'num_samples': 1997, 'number_of_characters': 518873, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.24, 'max_sentence2_length': 422, 'unique_sentence2': 1996}, 'ukr_Cyrl-bos_Latn': {'num_samples': 1997, 'number_of_characters': 517693, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 127.65, 'max_sentence2_length': 434, 'unique_sentence2': 1996}, 'ukr_Cyrl-bul_Cyrl': {'num_samples': 1997, 'number_of_characters': 532672, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 135.15, 'max_sentence2_length': 493, 'unique_sentence2': 1996}, 'ukr_Cyrl-ces_Latn': {'num_samples': 1997, 'number_of_characters': 504101, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 120.84, 'max_sentence2_length': 474, 'unique_sentence2': 1997}, 'ukr_Cyrl-eng_Latn': {'num_samples': 1997, 'number_of_characters': 510503, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ukr_Cyrl-hrv_Latn': {'num_samples': 1997, 'number_of_characters': 518708, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 128.15, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'ukr_Cyrl-mkd_Cyrl': {'num_samples': 1997, 'number_of_characters': 530674, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.15, 'max_sentence2_length': 451, 'unique_sentence2': 1997}, 'ukr_Cyrl-pol_Latn': {'num_samples': 1997, 'number_of_characters': 540649, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'ukr_Cyrl-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 537676, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'ukr_Cyrl-slk_Latn': {'num_samples': 1997, 'number_of_characters': 515752, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 126.67, 'max_sentence2_length': 403, 'unique_sentence2': 1996}, 'ukr_Cyrl-slv_Latn': {'num_samples': 1997, 'number_of_characters': 515679, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.64, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'ukr_Cyrl-srp_Cyrl': {'num_samples': 1997, 'number_of_characters': 515086, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 126.34, 'max_sentence2_length': 439, 'unique_sentence2': 1995}, 'ukr_Cyrl-srp_Latn': {'num_samples': 1997, 'number_of_characters': 518924, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 131.59, 'max_sentence1_length': 440, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 128.26, 'max_sentence2_length': 452, 'unique_sentence2': 1996}, 'urd_Arab-ben_Beng': {'num_samples': 1997, 'number_of_characters': 491800, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'urd_Arab-div_Thaa': {'num_samples': 1997, 'number_of_characters': 551846, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 152.15, 'max_sentence2_length': 609, 'unique_sentence2': 1996}, 'urd_Arab-eng_Latn': {'num_samples': 1997, 'number_of_characters': 495718, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'urd_Arab-eus_Latn': {'num_samples': 1997, 'number_of_characters': 523201, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 137.81, 'max_sentence2_length': 393, 'unique_sentence2': 1997}, 'urd_Arab-guj_Gujr': {'num_samples': 1997, 'number_of_characters': 492929, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 122.65, 'max_sentence2_length': 378, 'unique_sentence2': 1997}, 'urd_Arab-hin_Deva': {'num_samples': 1997, 'number_of_characters': 509573, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'urd_Arab-kan_Knda': {'num_samples': 1997, 'number_of_characters': 513534, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 132.97, 'max_sentence2_length': 449, 'unique_sentence2': 1996}, 'urd_Arab-mar_Deva': {'num_samples': 1997, 'number_of_characters': 508885, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 130.64, 'max_sentence2_length': 443, 'unique_sentence2': 1995}, 'urd_Arab-nep_Deva': {'num_samples': 1997, 'number_of_characters': 496221, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 124.3, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'urd_Arab-pan_Guru': {'num_samples': 1997, 'number_of_characters': 498420, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 125.4, 'max_sentence2_length': 383, 'unique_sentence2': 1996}, 'urd_Arab-sin_Sinh': {'num_samples': 1997, 'number_of_characters': 506739, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 129.56, 'max_sentence2_length': 441, 'unique_sentence2': 1996}, 'urd_Arab-snd_Arab': {'num_samples': 1997, 'number_of_characters': 468325, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 110.33, 'max_sentence2_length': 335, 'unique_sentence2': 1996}, 'urd_Arab-tam_Taml': {'num_samples': 1997, 'number_of_characters': 557959, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'urd_Arab-tel_Telu': {'num_samples': 1997, 'number_of_characters': 495525, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 124.19, 'max_sentence1_length': 390, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 123.95, 'max_sentence2_length': 412, 'unique_sentence2': 1996}, 'uzb_Latn-aze_Latn': {'num_samples': 1997, 'number_of_characters': 563329, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 135.02, 'max_sentence2_length': 398, 'unique_sentence2': 1997}, 'uzb_Latn-bak_Cyrl': {'num_samples': 1997, 'number_of_characters': 540021, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 123.35, 'max_sentence2_length': 437, 'unique_sentence2': 1995}, 'uzb_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 541415, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'uzb_Latn-kaz_Cyrl': {'num_samples': 1997, 'number_of_characters': 553971, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 130.33, 'max_sentence2_length': 473, 'unique_sentence2': 1996}, 'uzb_Latn-kir_Cyrl': {'num_samples': 1997, 'number_of_characters': 544559, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 125.62, 'max_sentence2_length': 395, 'unique_sentence2': 1996}, 'uzb_Latn-tat_Cyrl': {'num_samples': 1997, 'number_of_characters': 539621, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 123.15, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'uzb_Latn-tuk_Latn': {'num_samples': 1997, 'number_of_characters': 578969, 'unique_pairs': 1996, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 142.85, 'max_sentence2_length': 576, 'unique_sentence2': 1996}, 'uzb_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 559308, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'uzb_Latn-uig_Arab': {'num_samples': 1997, 'number_of_characters': 604717, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 147.07, 'max_sentence1_length': 470, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 155.74, 'max_sentence2_length': 592, 'unique_sentence2': 1996}, 'ven_Latn-bem_Latn': {'num_samples': 1997, 'number_of_characters': 598248, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 8, 'average_sentence2_length': 149.47, 'max_sentence2_length': 465, 'unique_sentence2': 1997}, 'ven_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 547476, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'ven_Latn-ewe_Latn': {'num_samples': 1997, 'number_of_characters': 538734, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 119.67, 'max_sentence2_length': 493, 'unique_sentence2': 1994}, 'ven_Latn-fuc_Latn': {'num_samples': 1997, 'number_of_characters': 528236, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 114.41, 'max_sentence2_length': 376, 'unique_sentence2': 1996}, 'ven_Latn-kin_Latn': {'num_samples': 1997, 'number_of_characters': 603543, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 152.12, 'max_sentence2_length': 541, 'unique_sentence2': 1996}, 'ven_Latn-nde_Latn': {'num_samples': 1997, 'number_of_characters': 597495, 'unique_pairs': 1997, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.09, 'max_sentence2_length': 590, 'unique_sentence2': 1997}, 'ven_Latn-nya_Latn': {'num_samples': 1997, 'number_of_characters': 584038, 'unique_pairs': 1996, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 10, 'average_sentence2_length': 142.35, 'max_sentence2_length': 464, 'unique_sentence2': 1993}, 'ven_Latn-sna_Latn': {'num_samples': 1997, 'number_of_characters': 598086, 'unique_pairs': 1995, 'min_sentence1_length': 10, 'average_sentence1_length': 150.1, 'max_sentence1_length': 535, 'unique_sentence1': 1993, 'min_sentence2_length': 6, 'average_sentence2_length': 149.39, 'max_sentence2_length': 511, 'unique_sentence2': 1995}, 'vie_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 502302, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'vie_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 514923, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'vie_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 566771, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'vie_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 570248, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'vie_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 518841, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'vie_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 514460, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'vie_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 540921, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'vie_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 564229, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'vie_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 471417, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'vie_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 532696, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'vie_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 549497, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'vie_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 558093, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'vie_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 382747, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'vie_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 404526, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'vie_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 530518, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'vie_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 563036, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'vie_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 548987, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'vie_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 548336, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'vie_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 546014, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'vie_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 559321, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'vie_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 543630, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'vie_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 522948, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'vie_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 581082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'vie_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 536734, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'vie_Latn-yue_Hant': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'vie_Latn-zho_Hans': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'vie_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'vie_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 135.76, 'max_sentence1_length': 437, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'wol_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 407310, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'wol_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 487523, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'wol_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 509769, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'wol_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 485904, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'wol_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 531302, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'wol_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 477234, 'unique_pairs': 1992, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'wol_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 531095, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'wol_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 527945, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'wol_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 512312, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'wol_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 405041, 'unique_pairs': 1996, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'wol_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 574086, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'wol_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'wol_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'wol_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 120.08, 'max_sentence1_length': 405, 'unique_sentence1': 1990, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'xho_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 435597, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'xho_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 515810, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'xho_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 538056, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'xho_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 514191, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'xho_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 559589, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'xho_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 505521, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'xho_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 559382, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'xho_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 556232, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'xho_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 540599, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'xho_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 433328, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'xho_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 602373, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'xho_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 507893, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'xho_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'xho_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 6, 'average_sentence1_length': 134.25, 'max_sentence1_length': 492, 'unique_sentence1': 1997, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yor_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 483595, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'yor_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 563808, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yor_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 586054, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'yor_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 562189, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'yor_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 607587, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'yor_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 553519, 'unique_pairs': 1996, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'yor_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 607380, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'yor_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 604230, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'yor_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 588597, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'yor_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 481326, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'yor_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 650371, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'yor_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 555891, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'yor_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 584178, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'yor_Latn-zul_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 7, 'average_sentence1_length': 158.28, 'max_sentence1_length': 582, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'yue_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 326607, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'yue_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 190513, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'yue_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 212292, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'yue_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 350008, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'yue_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'yue_Hant-zho_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 4, 'average_sentence1_length': 39.5, 'max_sentence1_length': 133, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hans-eng_Latn': {'num_samples': 1997, 'number_of_characters': 332681, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hans-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 196587, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hans-kor_Hang': {'num_samples': 1997, 'number_of_characters': 218366, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hans-vie_Latn': {'num_samples': 1997, 'number_of_characters': 356082, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hans-yue_Hant': {'num_samples': 1997, 'number_of_characters': 163848, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hans-zho_Hant': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 4, 'average_sentence1_length': 42.54, 'max_sentence1_length': 263, 'unique_sentence1': 1997, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}, 'zho_Hant-arb_Arab': {'num_samples': 1997, 'number_of_characters': 322659, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zho_Hant-ben_Beng': {'num_samples': 1997, 'number_of_characters': 335280, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zho_Hant-deu_Latn': {'num_samples': 1997, 'number_of_characters': 387128, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zho_Hant-ell_Grek': {'num_samples': 1997, 'number_of_characters': 390605, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zho_Hant-eng_Latn': {'num_samples': 1997, 'number_of_characters': 339198, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zho_Hant-fas_Arab': {'num_samples': 1997, 'number_of_characters': 334817, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zho_Hant-fin_Latn': {'num_samples': 1997, 'number_of_characters': 361278, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zho_Hant-fra_Latn': {'num_samples': 1997, 'number_of_characters': 384586, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zho_Hant-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 291774, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zho_Hant-hin_Deva': {'num_samples': 1997, 'number_of_characters': 353053, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zho_Hant-hun_Latn': {'num_samples': 1997, 'number_of_characters': 369854, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zho_Hant-ind_Latn': {'num_samples': 1997, 'number_of_characters': 378450, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zho_Hant-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 203104, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zho_Hant-kor_Hang': {'num_samples': 1997, 'number_of_characters': 224883, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zho_Hant-lit_Latn': {'num_samples': 1997, 'number_of_characters': 350875, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zho_Hant-nld_Latn': {'num_samples': 1997, 'number_of_characters': 383393, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zho_Hant-pol_Latn': {'num_samples': 1997, 'number_of_characters': 369344, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zho_Hant-por_Latn': {'num_samples': 1997, 'number_of_characters': 368693, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zho_Hant-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 366371, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zho_Hant-spa_Latn': {'num_samples': 1997, 'number_of_characters': 379678, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zho_Hant-swa_Latn': {'num_samples': 1997, 'number_of_characters': 363987, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zho_Hant-swe_Latn': {'num_samples': 1997, 'number_of_characters': 343305, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zho_Hant-tam_Taml': {'num_samples': 1997, 'number_of_characters': 401439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zho_Hant-tur_Latn': {'num_samples': 1997, 'number_of_characters': 357091, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zho_Hant-vie_Latn': {'num_samples': 1997, 'number_of_characters': 362599, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zho_Hant-yue_Hant': {'num_samples': 1997, 'number_of_characters': 170365, 'unique_pairs': 1996, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 39.5, 'max_sentence2_length': 133, 'unique_sentence2': 1996}, 'zho_Hant-zho_Hans': {'num_samples': 1997, 'number_of_characters': 176439, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 42.54, 'max_sentence2_length': 263, 'unique_sentence2': 1997}, 'zho_Hant-zul_Latn': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 3, 'average_sentence1_length': 45.81, 'max_sentence1_length': 200, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 129.06, 'max_sentence2_length': 494, 'unique_sentence2': 1996}, 'zul_Latn-amh_Ethi': {'num_samples': 1997, 'number_of_characters': 425239, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 83.88, 'max_sentence2_length': 290, 'unique_sentence2': 1994}, 'zul_Latn-arb_Arab': {'num_samples': 1997, 'number_of_characters': 488913, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 115.76, 'max_sentence2_length': 362, 'unique_sentence2': 1995}, 'zul_Latn-ben_Beng': {'num_samples': 1997, 'number_of_characters': 501534, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 122.08, 'max_sentence2_length': 402, 'unique_sentence2': 1997}, 'zul_Latn-deu_Latn': {'num_samples': 1997, 'number_of_characters': 553382, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 148.05, 'max_sentence2_length': 508, 'unique_sentence2': 1996}, 'zul_Latn-ell_Grek': {'num_samples': 1997, 'number_of_characters': 556859, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 149.79, 'max_sentence2_length': 584, 'unique_sentence2': 1996}, 'zul_Latn-eng_Latn': {'num_samples': 1997, 'number_of_characters': 505452, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 124.05, 'max_sentence2_length': 437, 'unique_sentence2': 1997}, 'zul_Latn-fas_Arab': {'num_samples': 1997, 'number_of_characters': 501071, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 121.85, 'max_sentence2_length': 389, 'unique_sentence2': 1995}, 'zul_Latn-fin_Latn': {'num_samples': 1997, 'number_of_characters': 527532, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.1, 'max_sentence2_length': 463, 'unique_sentence2': 1996}, 'zul_Latn-fra_Latn': {'num_samples': 1997, 'number_of_characters': 550840, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.77, 'max_sentence2_length': 512, 'unique_sentence2': 1996}, 'zul_Latn-hau_Latn': {'num_samples': 1997, 'number_of_characters': 527698, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 135.19, 'max_sentence2_length': 483, 'unique_sentence2': 1997}, 'zul_Latn-heb_Hebr': {'num_samples': 1997, 'number_of_characters': 458028, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 100.3, 'max_sentence2_length': 375, 'unique_sentence2': 1996}, 'zul_Latn-hin_Deva': {'num_samples': 1997, 'number_of_characters': 519307, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 130.98, 'max_sentence2_length': 394, 'unique_sentence2': 1996}, 'zul_Latn-hun_Latn': {'num_samples': 1997, 'number_of_characters': 536108, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 139.4, 'max_sentence2_length': 508, 'unique_sentence2': 1997}, 'zul_Latn-ibo_Latn': {'num_samples': 1997, 'number_of_characters': 503833, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 123.24, 'max_sentence2_length': 469, 'unique_sentence2': 1997}, 'zul_Latn-ind_Latn': {'num_samples': 1997, 'number_of_characters': 544704, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 143.7, 'max_sentence2_length': 486, 'unique_sentence2': 1997}, 'zul_Latn-jpn_Jpan': {'num_samples': 1997, 'number_of_characters': 369358, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 4, 'average_sentence2_length': 55.9, 'max_sentence2_length': 189, 'unique_sentence2': 1994}, 'zul_Latn-kor_Hang': {'num_samples': 1997, 'number_of_characters': 391137, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 66.8, 'max_sentence2_length': 217, 'unique_sentence2': 1995}, 'zul_Latn-lit_Latn': {'num_samples': 1997, 'number_of_characters': 517129, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 129.89, 'max_sentence2_length': 446, 'unique_sentence2': 1995}, 'zul_Latn-nld_Latn': {'num_samples': 1997, 'number_of_characters': 549647, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 146.18, 'max_sentence2_length': 539, 'unique_sentence2': 1996}, 'zul_Latn-nso_Latn': {'num_samples': 1997, 'number_of_characters': 549231, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 145.97, 'max_sentence2_length': 487, 'unique_sentence2': 1996}, 'zul_Latn-orm_Ethi': {'num_samples': 1997, 'number_of_characters': 495163, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 118.89, 'max_sentence2_length': 466, 'unique_sentence2': 1984}, 'zul_Latn-pol_Latn': {'num_samples': 1997, 'number_of_characters': 535598, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 9, 'average_sentence2_length': 139.14, 'max_sentence2_length': 468, 'unique_sentence2': 1996}, 'zul_Latn-por_Latn': {'num_samples': 1997, 'number_of_characters': 534947, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 138.82, 'max_sentence2_length': 497, 'unique_sentence2': 1996}, 'zul_Latn-rus_Cyrl': {'num_samples': 1997, 'number_of_characters': 532625, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 137.65, 'max_sentence2_length': 419, 'unique_sentence2': 1996}, 'zul_Latn-som_Latn': {'num_samples': 1997, 'number_of_characters': 549024, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 145.86, 'max_sentence2_length': 455, 'unique_sentence2': 1997}, 'zul_Latn-spa_Latn': {'num_samples': 1997, 'number_of_characters': 545932, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 1, 'average_sentence2_length': 144.32, 'max_sentence2_length': 504, 'unique_sentence2': 1996}, 'zul_Latn-ssw_Latn': {'num_samples': 1997, 'number_of_characters': 545874, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 144.29, 'max_sentence2_length': 510, 'unique_sentence2': 1996}, 'zul_Latn-swa_Latn': {'num_samples': 1997, 'number_of_characters': 530241, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 10, 'average_sentence2_length': 136.46, 'max_sentence2_length': 430, 'unique_sentence2': 1997}, 'zul_Latn-swe_Latn': {'num_samples': 1997, 'number_of_characters': 509559, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 8, 'average_sentence2_length': 126.1, 'max_sentence2_length': 430, 'unique_sentence2': 1996}, 'zul_Latn-tam_Taml': {'num_samples': 1997, 'number_of_characters': 567693, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 11, 'average_sentence2_length': 155.21, 'max_sentence2_length': 581, 'unique_sentence2': 1997}, 'zul_Latn-tir_Ethi': {'num_samples': 1997, 'number_of_characters': 422970, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 5, 'average_sentence2_length': 82.74, 'max_sentence2_length': 272, 'unique_sentence2': 1996}, 'zul_Latn-tsn_Latn': {'num_samples': 1997, 'number_of_characters': 592015, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 167.39, 'max_sentence2_length': 556, 'unique_sentence2': 1997}, 'zul_Latn-tur_Latn': {'num_samples': 1997, 'number_of_characters': 523345, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 133.01, 'max_sentence2_length': 504, 'unique_sentence2': 1997}, 'zul_Latn-vie_Latn': {'num_samples': 1997, 'number_of_characters': 528853, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 135.76, 'max_sentence2_length': 437, 'unique_sentence2': 1996}, 'zul_Latn-wol_Latn': {'num_samples': 1997, 'number_of_characters': 497535, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 120.08, 'max_sentence2_length': 405, 'unique_sentence2': 1990}, 'zul_Latn-xho_Latn': {'num_samples': 1997, 'number_of_characters': 525822, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 6, 'average_sentence2_length': 134.25, 'max_sentence2_length': 492, 'unique_sentence2': 1997}, 'zul_Latn-yor_Latn': {'num_samples': 1997, 'number_of_characters': 573820, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 7, 'average_sentence2_length': 158.28, 'max_sentence2_length': 582, 'unique_sentence2': 1996}, 'zul_Latn-zho_Hant': {'num_samples': 1997, 'number_of_characters': 349210, 'unique_pairs': 1997, 'min_sentence1_length': 8, 'average_sentence1_length': 129.06, 'max_sentence1_length': 494, 'unique_sentence1': 1996, 'min_sentence2_length': 3, 'average_sentence2_length': 45.81, 'max_sentence2_length': 200, 'unique_sentence2': 1996}}}}
NYSJudicialEthicsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
NaijaSenti ['hau', 'ibo', 'pcm', 'yor'] Classification s2s [Social, Written] None None
NamaaMrTydiReranking (Muennighoff et al., 2022) ['ara'] Reranking s2s [Encyclopaedic, Written] None None
NanoArguAnaRetrieval (Boteva et al., 2016) ['eng'] Retrieval s2p [Medical, Written] None None
NanoClimateFeverRetrieval (Thomas Diggelmann, 2021) ['eng'] Retrieval s2p [Academic, News, Non-fiction] None None
NanoDBPediaRetrieval (Lehmann et al., 2015) ['eng'] Retrieval s2p [Encyclopaedic] None None
NanoFEVERRetrieval ['eng'] Retrieval s2p [Academic, Encyclopaedic] None None
NanoFiQA2018Retrieval (Nandan Thakur, 2021) ['eng'] Retrieval s2p [Academic, Social] None None
NanoHotpotQARetrieval ['eng'] Retrieval s2p [Web, Written] None None
NanoMSMARCORetrieval (Tri Nguyen and Mir Rosenberg and Xia Song and Jianfeng Gao and Saurabh Tiwary and Rangan Majumder and Li Deng, 2016) ['eng'] Retrieval s2p [Web] None None
NanoNFCorpusRetrieval (Boteva et al., 2016) ['eng'] Retrieval s2p [Academic, Medical, Written] None None
NanoNQRetrieval (Tom Kwiatkowski, 2019) ['eng'] Retrieval s2p [Academic, Web] None None
NanoQuoraRetrieval (DataCanary et al., 2017) ['eng'] Retrieval s2s [Social] None None
NanoSCIDOCSRetrieval (Arman Cohan, 2020) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
NanoSciFactRetrieval (Arman Cohan, 2020) ['eng'] Retrieval s2p [Academic, Medical, Written] None None
NanoTouche2020Retrieval ['eng'] Retrieval s2p [Academic] None None
NarrativeQARetrieval (Tomáš Kočiský, 2017) ['eng'] Retrieval s2p None None
NepaliNewsClassification ['nep'] Classification s2s [News, Written] None None
NeuCLIR2022Retrieval (Lawrie et al., 2023) ['fas', 'rus', 'zho'] Retrieval s2p [News, Written] None None
NeuCLIR2022RetrievalHardNegatives (Lawrie et al., 2023) ['fas', 'rus', 'zho'] Retrieval s2p [News, Written] None None
NeuCLIR2023Retrieval (Dawn Lawrie, 2024) ['fas', 'rus', 'zho'] Retrieval s2p [News, Written] None None
NeuCLIR2023RetrievalHardNegatives (Dawn Lawrie, 2024) ['fas', 'rus', 'zho'] Retrieval s2p [News, Written] None None
News21InstructionRetrieval (Orion Weller, 2024) ['eng'] InstructionRetrieval s2p [News, Written] None None
NewsClassification (Zhang et al., 2015) ['eng'] Classification s2s [News, Written] None None
NoRecClassification ['nob'] Classification s2s [Reviews, Written] None None
NollySentiBitextMining (Shode et al., 2023) ['eng', 'hau', 'ibo', 'pcm', 'yor'] BitextMining s2s [Reviews, Social, Written] {'train': 1640} {'train': {'num_samples': 1640, 'number_of_characters': 445805, 'unique_pairs': 1632, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 135.52, 'max_sentence2_length': 1728, 'unique_sentence2': 1631, 'hf_subset_descriptive_stats': {'en-ha': {'num_samples': 410, 'number_of_characters': 115348, 'unique_pairs': 407, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 4, 'average_sentence2_length': 145.02, 'max_sentence2_length': 1728, 'unique_sentence2': 407}, 'en-ig': {'num_samples': 410, 'number_of_characters': 107173, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 5, 'average_sentence2_length': 125.08, 'max_sentence2_length': 1137, 'unique_sentence2': 408}, 'en-pcm': {'num_samples': 410, 'number_of_characters': 109955, 'unique_pairs': 408, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 3, 'average_sentence2_length': 131.87, 'max_sentence2_length': 1552, 'unique_sentence2': 408}, 'en-yo': {'num_samples': 410, 'number_of_characters': 113329, 'unique_pairs': 409, 'min_sentence1_length': 3, 'average_sentence1_length': 136.32, 'max_sentence1_length': 1698, 'unique_sentence1': 405, 'min_sentence2_length': 6, 'average_sentence2_length': 140.1, 'max_sentence2_length': 1338, 'unique_sentence2': 409}}}}
NorQuadRetrieval ['nob'] Retrieval p2p [Encyclopaedic, Non-fiction, Written] None None
NordicLangClassification ['dan', 'fao', 'isl', 'nno', 'nob', 'swe'] Classification s2s [Encyclopaedic] None None
NorwegianCourtsBitextMining (Tiedemann et al., 2020) ['nno', 'nob'] BitextMining s2s [Legal, Written] {'test': 228} {'test': {'num_samples': 228, 'number_of_characters': 37441, 'unique_pairs': 228, 'min_sentence1_length': 13, 'average_sentence1_length': 82.2, 'max_sentence1_length': 272, 'unique_sentence1': 227, 'min_sentence2_length': 10, 'average_sentence2_length': 82.02, 'max_sentence2_length': 269, 'unique_sentence2': 226}}
NorwegianParliamentClassification ['nob'] Classification s2s [Government, Spoken] None None
NusaParagraphEmotionClassification ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] Classification s2s [Fiction, Non-fiction, Written] None None
NusaParagraphTopicClassification ['bbc', 'bew', 'bug', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] Classification s2s [Fiction, Non-fiction, Written] None None
NusaTranslationBitextMining (Cahyawijaya et al., 2023) ['abs', 'bbc', 'bew', 'bhp', 'ind', 'jav', 'mad', 'mak', 'min', 'mui', 'rej', 'sun'] BitextMining s2s [Social, Written] {'train': 50200} {'train': {'num_samples': 50200, 'number_of_characters': 14759870, 'unique_pairs': 50140, 'min_sentence1_length': 5, 'average_sentence1_length': 145.46, 'max_sentence1_length': 873, 'unique_sentence1': 8258, 'min_sentence2_length': 5, 'average_sentence2_length': 148.57, 'max_sentence2_length': 980, 'unique_sentence2': 50102, 'hf_subset_descriptive_stats': {'ind-abs': {'num_samples': 1000, 'number_of_characters': 295680, 'unique_pairs': 999, 'min_sentence1_length': 5, 'average_sentence1_length': 148.37, 'max_sentence1_length': 727, 'unique_sentence1': 998, 'min_sentence2_length': 6, 'average_sentence2_length': 147.31, 'max_sentence2_length': 629, 'unique_sentence2': 998}, 'ind-btk': {'num_samples': 6600, 'number_of_characters': 1927907, 'unique_pairs': 6597, 'min_sentence1_length': 5, 'average_sentence1_length': 145.37, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 146.74, 'max_sentence2_length': 980, 'unique_sentence2': 6596}, 'ind-bew': {'num_samples': 6600, 'number_of_characters': 1939300, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.41, 'max_sentence2_length': 840, 'unique_sentence2': 6590}, 'ind-bhp': {'num_samples': 1000, 'number_of_characters': 261666, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 133.53, 'max_sentence1_length': 468, 'unique_sentence1': 999, 'min_sentence2_length': 10, 'average_sentence2_length': 128.14, 'max_sentence2_length': 459, 'unique_sentence2': 999}, 'ind-jav': {'num_samples': 6600, 'number_of_characters': 1922162, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 145.81, 'max_sentence2_length': 854, 'unique_sentence2': 6585}, 'ind-mad': {'num_samples': 6600, 'number_of_characters': 1973257, 'unique_pairs': 6598, 'min_sentence1_length': 5, 'average_sentence1_length': 145.36, 'max_sentence1_length': 873, 'unique_sentence1': 6521, 'min_sentence2_length': 5, 'average_sentence2_length': 153.62, 'max_sentence2_length': 827, 'unique_sentence2': 6592}, 'ind-mak': {'num_samples': 6600, 'number_of_characters': 1953868, 'unique_pairs': 6594, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 150.61, 'max_sentence2_length': 888, 'unique_sentence2': 6586}, 'ind-min': {'num_samples': 6600, 'number_of_characters': 1937033, 'unique_pairs': 6595, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 6, 'average_sentence2_length': 148.06, 'max_sentence2_length': 837, 'unique_sentence2': 6591}, 'ind-mui': {'num_samples': 1000, 'number_of_characters': 301448, 'unique_pairs': 1000, 'min_sentence1_length': 11, 'average_sentence1_length': 150.45, 'max_sentence1_length': 451, 'unique_sentence1': 997, 'min_sentence2_length': 11, 'average_sentence2_length': 150.99, 'max_sentence2_length': 450, 'unique_sentence2': 1000}, 'ind-rej': {'num_samples': 1000, 'number_of_characters': 291205, 'unique_pairs': 1000, 'min_sentence1_length': 9, 'average_sentence1_length': 151.62, 'max_sentence1_length': 873, 'unique_sentence1': 998, 'min_sentence2_length': 8, 'average_sentence2_length': 139.58, 'max_sentence2_length': 784, 'unique_sentence2': 1000}, 'ind-sun': {'num_samples': 6600, 'number_of_characters': 1956344, 'unique_pairs': 6591, 'min_sentence1_length': 5, 'average_sentence1_length': 145.43, 'max_sentence1_length': 873, 'unique_sentence1': 6512, 'min_sentence2_length': 5, 'average_sentence2_length': 150.99, 'max_sentence2_length': 881, 'unique_sentence2': 6588}}}}
NusaX-senti (Winata et al., 2022) ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] Classification s2s [Constructed, Reviews, Social, Web, Written] None None
NusaXBitextMining (Winata et al., 2023) ['ace', 'ban', 'bbc', 'bjn', 'bug', 'eng', 'ind', 'jav', 'mad', 'min', 'nij', 'sun'] BitextMining s2s [Reviews, Written] None None
OKVQAIT2TRetrieval (Marino et al., 2019) ['eng'] Any2AnyRetrieval it2t [Encyclopaedic] {'test': 119562} {'test': {'number_of_characters': 72551583, 'num_samples': 119562, 'num_queries': 5046, 'num_documents': 114516, 'min_document_length': 234, 'average_document_length': 631.71, 'max_document_length': 2112, 'unique_documents': 114516, 'num_document_images': 0, 'min_query_length': 11, 'average_query_length': 41.71, 'max_query_length': 155, 'unique_queries': 4624, 'num_query_images': 5046, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 7.43, 'max_relevant_docs_per_query': 9, 'unique_relevant_docs': 19941}}
OPP115DataRetentionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115DataSecurityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115DoNotTrackLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115FirstPartyCollectionUseLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115InternationalAndSpecificAudiencesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115PolicyChangeLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115ThirdPartySharingCollectionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115UserAccessEditAndDeletionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OPP115UserChoiceControlLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OVENIT2ITRetrieval (Hu et al., 2023) ['eng'] Any2AnyRetrieval it2it [Encyclopaedic] {'test': 349876} {'test': {'number_of_characters': 199201924, 'num_samples': 349876, 'num_queries': 14741, 'num_documents': 335135, 'min_document_length': 11, 'average_document_length': 593.1, 'max_document_length': 2531, 'unique_documents': 335135, 'num_document_images': 335135, 'min_query_length': 11, 'average_query_length': 29.36, 'max_query_length': 100, 'unique_queries': 520, 'num_query_images': 14741, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 17.72, 'max_relevant_docs_per_query': 128, 'unique_relevant_docs': 14284}}
OVENIT2TRetrieval (Hu et al., 2023) ['eng'] Any2AnyRetrieval it2i [Encyclopaedic] {'test': 726671} {'test': {'number_of_characters': 356548734, 'num_samples': 726671, 'num_queries': 50004, 'num_documents': 676667, 'min_document_length': 2, 'average_document_length': 524.5, 'max_document_length': 60546, 'unique_documents': 676667, 'num_document_images': 0, 'min_query_length': 11, 'average_query_length': 32.77, 'max_query_length': 78, 'unique_queries': 1540, 'num_query_images': 50004, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 9.85, 'max_relevant_docs_per_query': 174, 'unique_relevant_docs': 24471}}
Ocnli (Hai Hu, 2020) ['cmn'] PairClassification s2s None None
OdiaNewsClassification (Anoop Kunchukuttan, 2020) ['ory'] Classification s2s [News, Written] None None
OnlineShopping (Xiao et al., 2023) ['cmn'] Classification s2s None None
OnlineStoreReviewSentimentClassification ['ara'] Classification s2s [Reviews, Written] None None
OpusparcusPC (Mathias Creutz, 2018) ['deu', 'eng', 'fin', 'fra', 'rus', 'swe'] PairClassification s2s [Spoken, Spoken] None None
OralArgumentQuestionPurposeLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OverrulingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
OxfordFlowersClassification (Nilsback et al., 2008) ['eng'] ImageClassification i2i [Reviews] {'test': 1020} {'test': {'num_samples': 1020, 'unique_num_labels': 102, 'min_image_width': 500, 'average_image_width': 618.07, 'max_image_width': 873, 'min_image_height': 500, 'average_image_height': 538.26, 'max_image_height': 928, 'labels': {'0': {'count': 9}, '1': {'count': 9}, '2': {'count': 10}, '3': {'count': 9}, '4': {'count': 11}, '5': {'count': 11}, '6': {'count': 10}, '7': {'count': 10}, '8': {'count': 11}, '9': {'count': 10}, '10': {'count': 10}, '11': {'count': 9}, '12': {'count': 10}, '13': {'count': 10}, '14': {'count': 10}, '15': {'count': 9}, '16': {'count': 11}, '17': {'count': 11}, '18': {'count': 10}, '19': {'count': 9}, '20': {'count': 9}, '21': {'count': 10}, '22': {'count': 11}, '23': {'count': 11}, '24': {'count': 10}, '25': {'count': 11}, '26': {'count': 10}, '27': {'count': 9}, '28': {'count': 11}, '29': {'count': 10}, '30': {'count': 10}, '31': {'count': 9}, '32': {'count': 10}, '33': {'count': 10}, '34': {'count': 10}, '35': {'count': 11}, '36': {'count': 9}, '37': {'count': 10}, '38': {'count': 10}, '39': {'count': 11}, '40': {'count': 10}, '41': {'count': 10}, '42': {'count': 11}, '43': {'count': 10}, '44': {'count': 10}, '45': {'count': 10}, '46': {'count': 10}, '47': {'count': 9}, '48': {'count': 10}, '49': {'count': 11}, '50': {'count': 10}, '51': {'count': 10}, '52': {'count': 10}, '53': {'count': 10}, '54': {'count': 10}, '55': {'count': 10}, '56': {'count': 10}, '57': {'count': 11}, '58': {'count': 10}, '59': {'count': 10}, '60': {'count': 10}, '61': {'count': 10}, '62': {'count': 9}, '63': {'count': 10}, '64': {'count': 10}, '65': {'count': 9}, '66': {'count': 11}, '67': {'count': 10}, '68': {'count': 11}, '69': {'count': 9}, '70': {'count': 9}, '71': {'count': 10}, '72': {'count': 10}, '73': {'count': 10}, '74': {'count': 10}, '75': {'count': 10}, '76': {'count': 10}, '77': {'count': 10}, '78': {'count': 9}, '79': {'count': 10}, '80': {'count': 10}, '81': {'count': 11}, '82': {'count': 10}, '83': {'count': 9}, '84': {'count': 11}, '85': {'count': 10}, '86': {'count': 10}, '87': {'count': 10}, '88': {'count': 10}, '89': {'count': 10}, '90': {'count': 10}, '91': {'count': 10}, '92': {'count': 10}, '93': {'count': 9}, '94': {'count': 9}, '95': {'count': 10}, '96': {'count': 10}, '97': {'count': 10}, '98': {'count': 10}, '99': {'count': 10}, '100': {'count': 10}, '101': {'count': 11}}}}
OxfordPets (Parkhi et al., 2012) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 3669} {'test': {'num_samples': 3669, 'unique_num_labels': 37, 'min_image_width': 137, 'average_image_width': 443.46, 'max_image_width': 1646, 'min_image_height': 103, 'average_image_height': 399.38, 'max_image_height': 2160, 'labels': {'0': {'count': 98}, '1': {'count': 100}, '2': {'count': 100}, '3': {'count': 100}, '4': {'count': 100}, '5': {'count': 100}, '6': {'count': 100}, '7': {'count': 88}, '8': {'count': 99}, '9': {'count': 100}, '10': {'count': 100}, '11': {'count': 97}, '12': {'count': 100}, '13': {'count': 100}, '14': {'count': 100}, '15': {'count': 100}, '16': {'count': 100}, '17': {'count': 100}, '18': {'count': 99}, '19': {'count': 100}, '20': {'count': 100}, '21': {'count': 100}, '22': {'count': 100}, '23': {'count': 100}, '24': {'count': 100}, '25': {'count': 100}, '26': {'count': 100}, '27': {'count': 100}, '28': {'count': 100}, '29': {'count': 100}, '30': {'count': 99}, '31': {'count': 100}, '32': {'count': 100}, '33': {'count': 100}, '34': {'count': 89}, '35': {'count': 100}, '36': {'count': 100}}}}
OxfordPetsZeroShot (Subhransu Maji, 2013) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 3669} {'test': {'num_samples': 3669, 'unique_num_labels': 37, 'min_image_width': 137, 'average_image_width': 443.46, 'max_image_width': 1646, 'min_image_height': 103, 'average_image_height': 399.38, 'max_image_height': 2160, 'min_label_text_length': 32, 'average_label_text_length': 40.68, 'max_label_text_length': 55, 'labels': {'0': {'count': 98}, '1': {'count': 100}, '2': {'count': 100}, '3': {'count': 100}, '4': {'count': 100}, '5': {'count': 100}, '6': {'count': 100}, '7': {'count': 88}, '8': {'count': 99}, '9': {'count': 100}, '10': {'count': 100}, '11': {'count': 97}, '12': {'count': 100}, '13': {'count': 100}, '14': {'count': 100}, '15': {'count': 100}, '16': {'count': 100}, '17': {'count': 100}, '18': {'count': 99}, '19': {'count': 100}, '20': {'count': 100}, '21': {'count': 100}, '22': {'count': 100}, '23': {'count': 100}, '24': {'count': 100}, '25': {'count': 100}, '26': {'count': 100}, '27': {'count': 100}, '28': {'count': 100}, '29': {'count': 100}, '30': {'count': 99}, '31': {'count': 100}, '32': {'count': 100}, '33': {'count': 100}, '34': {'count': 89}, '35': {'count': 100}, '36': {'count': 100}}}}
PAC (Łukasz Augustyniak, 2022) ['pol'] Classification p2p [Legal, Written] None None
PAWSX (Shitao Xiao, 2024) ['cmn'] STS s2s None None
PIQA (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
PROALegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
PSC ['pol'] PairClassification s2s [News, Written] None None
ParsinluEntail ['fas'] PairClassification s2s None None
ParsinluQueryParaphPC ['fas'] PairClassification s2s None None
PatchCamelyon ['eng'] ImageClassification i2i [Medical] {'test': 32768} {'test': {'num_samples': 32768, 'unique_num_labels': 2, 'min_image_width': 96, 'average_image_width': 96.0, 'max_image_width': 96, 'min_image_height': 96, 'average_image_height': 96.0, 'max_image_height': 96, 'labels': {'0': {'count': 16391}, '1': {'count': 16377}}}}
PatchCamelyonZeroShot ['eng'] ZeroShotClassification i2t [Medical] {'test': 32768} {'test': {'num_samples': 32768, 'unique_num_labels': 2, 'min_image_width': 96, 'average_image_width': 96.0, 'max_image_width': 96, 'min_image_height': 96, 'average_image_height': 96.0, 'max_image_height': 96, 'min_label_text_length': 35, 'average_label_text_length': 52.0, 'max_label_text_length': 69, 'labels': {'0': {'count': 16391}, '1': {'count': 16377}}}}
PatentClassification ['eng'] Classification s2s [Legal, Written] None None
PawsXPairClassification (Yinfei Yang, 2019) ['cmn', 'deu', 'eng', 'fra', 'jpn', 'kor', 'spa'] PairClassification s2s [Encyclopaedic, Web, Written] {'test': 14000, 'validation': 14000} {'test': {'num_samples': 14000, 'number_of_characters': 2551922, 'min_sentence1_length': 2, 'avg_sentence1_length': 91.18, 'max_sentence1_length': 268, 'unique_sentence1': 13404, 'min_sentence2_length': 2, 'avg_sentence2_length': 91.1, 'max_sentence2_length': 247, 'unique_sentence2': 13462, 'unique_labels': 2, 'labels': {'1': {'count': 6285}, '0': {'count': 7715}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 478034, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.78, 'max_sentence1_length': 268, 'unique_sentence1': 1934, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.24, 'max_sentence2_length': 235, 'unique_sentence2': 1938, 'unique_labels': 2, 'labels': {'1': {'count': 895}, '0': {'count': 1105}}}, 'en': {'num_samples': 2000, 'number_of_characters': 454362, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.76, 'max_sentence1_length': 209, 'unique_sentence1': 1761, 'min_sentence2_length': 25, 'avg_sentence2_length': 113.42, 'max_sentence2_length': 209, 'unique_sentence2': 1800, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'es': {'num_samples': 2000, 'number_of_characters': 471226, 'min_sentence1_length': 2, 'avg_sentence1_length': 117.81, 'max_sentence1_length': 226, 'unique_sentence1': 1955, 'min_sentence2_length': 22, 'avg_sentence2_length': 117.8, 'max_sentence2_length': 233, 'unique_sentence2': 1959, 'unique_labels': 2, 'labels': {'1': {'count': 907}, '0': {'count': 1093}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 480033, 'min_sentence1_length': 2, 'avg_sentence1_length': 120.03, 'max_sentence1_length': 238, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.99, 'max_sentence2_length': 247, 'unique_sentence2': 1953, 'unique_labels': 2, 'labels': {'1': {'count': 903}, '0': {'count': 1097}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 235106, 'min_sentence1_length': 2, 'avg_sentence1_length': 58.68, 'max_sentence1_length': 192, 'unique_sentence1': 1944, 'min_sentence2_length': 2, 'avg_sentence2_length': 58.88, 'max_sentence2_length': 198, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 883}, '0': {'count': 1117}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 260149, 'min_sentence1_length': 2, 'avg_sentence1_length': 64.96, 'max_sentence1_length': 153, 'unique_sentence1': 1954, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.11, 'max_sentence2_length': 159, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 896}, '0': {'count': 1104}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 173012, 'min_sentence1_length': 2, 'avg_sentence1_length': 43.23, 'max_sentence1_length': 120, 'unique_sentence1': 1909, 'min_sentence2_length': 2, 'avg_sentence2_length': 43.27, 'max_sentence2_length': 113, 'unique_sentence2': 1909, 'unique_labels': 2, 'labels': {'1': {'count': 894}, '0': {'count': 1106}}}}}, 'validation': {'num_samples': 14000, 'number_of_characters': 2524625, 'min_sentence1_length': 2, 'avg_sentence1_length': 90.13, 'max_sentence1_length': 248, 'unique_sentence1': 13357, 'min_sentence2_length': 2, 'avg_sentence2_length': 90.2, 'max_sentence2_length': 275, 'unique_sentence2': 13397, 'unique_labels': 2, 'labels': {'1': {'count': 5948}, '0': {'count': 8052}}, 'hf_subset_descriptive_stats': {'de': {'num_samples': 2000, 'number_of_characters': 467643, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.82, 'max_sentence1_length': 248, 'unique_sentence1': 1914, 'min_sentence2_length': 2, 'avg_sentence2_length': 117.0, 'max_sentence2_length': 275, 'unique_sentence2': 1920, 'unique_labels': 2, 'labels': {'1': {'count': 831}, '0': {'count': 1169}}}, 'en': {'num_samples': 2000, 'number_of_characters': 451931, 'min_sentence1_length': 25, 'avg_sentence1_length': 113.11, 'max_sentence1_length': 213, 'unique_sentence1': 1758, 'min_sentence2_length': 25, 'avg_sentence2_length': 112.86, 'max_sentence2_length': 213, 'unique_sentence2': 1771, 'unique_labels': 2, 'labels': {'1': {'count': 863}, '0': {'count': 1137}}}, 'es': {'num_samples': 2000, 'number_of_characters': 466112, 'min_sentence1_length': 2, 'avg_sentence1_length': 116.33, 'max_sentence1_length': 240, 'unique_sentence1': 1938, 'min_sentence2_length': 2, 'avg_sentence2_length': 116.73, 'max_sentence2_length': 241, 'unique_sentence2': 1941, 'unique_labels': 2, 'labels': {'1': {'count': 847}, '0': {'count': 1153}}}, 'fr': {'num_samples': 2000, 'number_of_characters': 478510, 'min_sentence1_length': 2, 'avg_sentence1_length': 119.5, 'max_sentence1_length': 233, 'unique_sentence1': 1933, 'min_sentence2_length': 2, 'avg_sentence2_length': 119.75, 'max_sentence2_length': 246, 'unique_sentence2': 1939, 'unique_labels': 2, 'labels': {'1': {'count': 860}, '0': {'count': 1140}}}, 'ja': {'num_samples': 2000, 'number_of_characters': 229655, 'min_sentence1_length': 2, 'avg_sentence1_length': 57.51, 'max_sentence1_length': 126, 'unique_sentence1': 1957, 'min_sentence2_length': 2, 'avg_sentence2_length': 57.32, 'max_sentence2_length': 121, 'unique_sentence2': 1969, 'unique_labels': 2, 'labels': {'1': {'count': 854}, '0': {'count': 1146}}}, 'ko': {'num_samples': 2000, 'number_of_characters': 261355, 'min_sentence1_length': 2, 'avg_sentence1_length': 65.16, 'max_sentence1_length': 178, 'unique_sentence1': 1963, 'min_sentence2_length': 2, 'avg_sentence2_length': 65.52, 'max_sentence2_length': 174, 'unique_sentence2': 1968, 'unique_labels': 2, 'labels': {'1': {'count': 840}, '0': {'count': 1160}}}, 'zh': {'num_samples': 2000, 'number_of_characters': 169419, 'min_sentence1_length': 2, 'avg_sentence1_length': 42.45, 'max_sentence1_length': 101, 'unique_sentence1': 1899, 'min_sentence2_length': 2, 'avg_sentence2_length': 42.26, 'max_sentence2_length': 120, 'unique_sentence2': 1895, 'unique_labels': 2, 'labels': {'1': {'count': 853}, '0': {'count': 1147}}}}}}
PersianFoodSentimentClassification (Mehrdad Farahani et al., 2020) ['fas'] Classification s2s [Reviews, Written] None None
PersianTextEmotion ['fas'] Classification s2s None None
PersianTextTone ['fas'] Classification s2p None None
PersianWebDocumentRetrieval ['fas'] Retrieval s2p [Web] None None
PersonalJurisdictionLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
PhincBitextMining (Srivastava et al., 2020) ['eng', 'hin'] BitextMining s2s [Social, Written] {'train': 13738} {'train': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736, 'hf_subset_descriptive_stats': {'eng-eng_hin': {'num_samples': 13738, 'number_of_characters': 2069457, 'unique_pairs': 13737, 'min_sentence1_length': 1, 'average_sentence1_length': 74.02, 'max_sentence1_length': 278, 'unique_sentence1': 13515, 'min_sentence2_length': 3, 'average_sentence2_length': 76.61, 'max_sentence2_length': 274, 'unique_sentence2': 13736}}}}
PlscClusteringP2P.v2 ['pol'] Clustering s2s [Academic, Written] None None
PlscClusteringS2S.v2 ['pol'] Clustering s2s [Academic, Written] None None
PoemSentimentClassification (Emily Sheng, 2020) ['eng'] Classification s2s [Reviews, Written] None None
PolEmo2.0-IN ['pol'] Classification s2s [Social, Written] None None
PolEmo2.0-OUT ['pol'] Classification s2s [Social, Written] None None
PpcPC (Sławomir Dadas, 2022) ['pol'] PairClassification s2s [Fiction, News, Non-fiction, Social, Spoken, Web, Written] None None
PubChemAISentenceParaphrasePC (Kasmaee et al., 2024) ['eng'] PairClassification s2s [Chemistry] None None
PubChemSMILESBitextMining (Kasmaee et al., 2024) ['eng'] BitextMining s2s [Chemistry] None None
PubChemSMILESPC (Kasmaee et al., 2024) ['eng'] PairClassification s2s [Chemistry] None None
PubChemSynonymPC (Kasmaee et al., 2024) ['eng'] PairClassification s2s [Chemistry] None None
PubChemWikiPairClassification (Kasmaee et al., 2024) ['ces', 'deu', 'eng', 'fra', 'hin', 'jpn', 'kor', 'msa', 'nld', 'por', 'spa', 'tur', 'zho'] PairClassification s2s [Chemistry] None None
PubChemWikiParagraphsPC (Kasmaee et al., 2024) ['eng'] PairClassification p2p [Chemistry] None None
PublicHealthQA ['ara', 'eng', 'fra', 'kor', 'rus', 'spa', 'vie', 'zho'] Retrieval s2p [Government, Medical, Web, Written] None None
PunjabiNewsClassification (Anoop Kunchukuttan, 2020) ['pan'] Classification s2s [News, Written] None None
QBQTC ['cmn'] STS s2s None None
Quail (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
Query2Query ['fas'] STS s2s None None
Quora-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2s [Written] None None
Quora-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2s None None
Quora-PLHardNegatives (Konrad Wojtasik, 2024) ['pol'] Retrieval s2s None None
QuoraRetrieval (DataCanary et al., 2017) ['eng'] Retrieval s2s [Blog, Web, Written] None None
QuoraRetrieval-Fa ['fas'] Retrieval s2s [Web] None None
QuoraRetrievalHardNegatives (DataCanary et al., 2017) ['eng'] Retrieval s2s None None
RARbCode (Xiao et al., 2024) ['eng'] Retrieval s2p [Programming, Written] None None
RARbMath (Xiao et al., 2024) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
RESISC45 (Cheng et al., 2017) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 6300} {'test': {'num_samples': 6300, 'unique_num_labels': 45, 'min_image_width': 256, 'average_image_width': 256.0, 'max_image_width': 256, 'min_image_height': 256, 'average_image_height': 256.0, 'max_image_height': 256, 'labels': {'31': {'count': 135}, '11': {'count': 144}, '28': {'count': 135}, '43': {'count': 154}, '41': {'count': 144}, '33': {'count': 134}, '19': {'count': 130}, '16': {'count': 127}, '22': {'count': 130}, '34': {'count': 143}, '24': {'count': 164}, '0': {'count': 169}, '13': {'count': 146}, '25': {'count': 115}, '6': {'count': 132}, '36': {'count': 135}, '39': {'count': 142}, '18': {'count': 140}, '23': {'count': 147}, '37': {'count': 159}, '15': {'count': 122}, '29': {'count': 140}, '9': {'count': 159}, '27': {'count': 140}, '21': {'count': 131}, '3': {'count': 134}, '1': {'count': 162}, '32': {'count': 153}, '26': {'count': 150}, '35': {'count': 151}, '44': {'count': 118}, '30': {'count': 154}, '20': {'count': 139}, '4': {'count': 130}, '42': {'count': 127}, '40': {'count': 137}, '5': {'count': 140}, '17': {'count': 142}, '2': {'count': 123}, '38': {'count': 130}, '10': {'count': 140}, '12': {'count': 146}, '8': {'count': 146}, '7': {'count': 143}, '14': {'count': 118}}}}
RESISC45ZeroShot (Cheng et al., 2017) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 6300} {'test': {'num_samples': 6300, 'unique_num_labels': 45, 'min_image_width': 256, 'average_image_width': 256.0, 'max_image_width': 256, 'min_image_height': 256, 'average_image_height': 256.0, 'max_image_height': 256, 'min_label_text_length': 26, 'average_label_text_length': 32.16, 'max_label_text_length': 43, 'labels': {'31': {'count': 135}, '11': {'count': 144}, '28': {'count': 135}, '43': {'count': 154}, '41': {'count': 144}, '33': {'count': 134}, '19': {'count': 130}, '16': {'count': 127}, '22': {'count': 130}, '34': {'count': 143}, '24': {'count': 164}, '0': {'count': 169}, '13': {'count': 146}, '25': {'count': 115}, '6': {'count': 132}, '36': {'count': 135}, '39': {'count': 142}, '18': {'count': 140}, '23': {'count': 147}, '37': {'count': 159}, '15': {'count': 122}, '29': {'count': 140}, '9': {'count': 159}, '27': {'count': 140}, '21': {'count': 131}, '3': {'count': 134}, '1': {'count': 162}, '32': {'count': 153}, '26': {'count': 150}, '35': {'count': 151}, '44': {'count': 118}, '30': {'count': 154}, '20': {'count': 139}, '4': {'count': 130}, '42': {'count': 127}, '40': {'count': 137}, '5': {'count': 140}, '17': {'count': 142}, '2': {'count': 123}, '38': {'count': 130}, '10': {'count': 140}, '12': {'count': 146}, '8': {'count': 146}, '7': {'count': 143}, '14': {'count': 118}}}}
ROxfordEasyI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 5063} {'test': {'number_of_characters': 0, 'num_samples': 5063, 'num_queries': 70, 'num_documents': 4993, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 4993, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 43.27, 'max_relevant_docs_per_query': 248, 'unique_relevant_docs': 4993}}
ROxfordHardI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 5063} {'test': {'number_of_characters': 0, 'num_samples': 5063, 'num_queries': 70, 'num_documents': 4993, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 4993, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 35.67, 'max_relevant_docs_per_query': 284, 'unique_relevant_docs': 4993}}
ROxfordMediumI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 5063} {'test': {'number_of_characters': 0, 'num_samples': 5063, 'num_queries': 70, 'num_documents': 4993, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 4993, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 78.94, 'max_relevant_docs_per_query': 347, 'unique_relevant_docs': 4993}}
RP2kI2IRetrieval (Peng et al., 2020) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 77643} {'test': {'number_of_characters': 0, 'num_samples': 77643, 'num_queries': 38186, 'num_documents': 39457, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 39457, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 38186, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 115.47, 'max_relevant_docs_per_query': 1069, 'unique_relevant_docs': 38181}}
RParisEasyI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 6392} {'test': {'number_of_characters': 0, 'num_samples': 6392, 'num_queries': 70, 'num_documents': 6322, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 6322, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 98.2, 'max_relevant_docs_per_query': 199, 'unique_relevant_docs': 6322}}
RParisHardI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 6392} {'test': {'number_of_characters': 0, 'num_samples': 6392, 'num_queries': 70, 'num_documents': 6322, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 6322, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 34, 'average_relevant_docs_per_query': 147.86, 'max_relevant_docs_per_query': 556, 'unique_relevant_docs': 6322}}
RParisMediumI2IRetrieval (Radenovi{'c, 2018) ['eng'] Any2AnyRetrieval i2i [Web] {'test': 6392} {'test': {'number_of_characters': 0, 'num_samples': 6392, 'num_queries': 70, 'num_documents': 6322, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'min_document_image_width': 256, 'average_document_image_width': 256.0, 'max_document_image_width': 256, 'min_document_image_height': 256, 'average_document_image_height': 256.0, 'max_document_image_height': 256, 'num_document_images': 6322, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 70, 'min_query_image_width': 256, 'average_query_image_width': 256.0, 'max_query_image_width': 256, 'min_query_image_height': 256, 'average_query_image_height': 256.0, 'max_query_image_height': 256, 'min_relevant_docs_per_query': 76, 'average_relevant_docs_per_query': 246.06, 'max_relevant_docs_per_query': 636, 'unique_relevant_docs': 6322}}
RTE3 ['deu', 'eng', 'fra', 'ita'] PairClassification s2s [Encyclopaedic, News, Web, Written] None None
RUParaPhraserSTS (Pivovarova et al., 2017) ['rus'] STS s2s [News, Written] None None
ReMuQIT2TRetrieval ['eng'] Any2AnyRetrieval it2t [Encyclopaedic] {'test': 142403} {'test': {'number_of_characters': 29161615, 'num_samples': 142403, 'num_queries': 3609, 'num_documents': 138794, 'min_document_length': 9, 'average_document_length': 208.19, 'max_document_length': 508, 'unique_documents': 138794, 'num_document_images': 0, 'min_query_length': 18, 'average_query_length': 73.86, 'max_query_length': 218, 'unique_queries': 3608, 'num_query_images': 3609, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3607}}
RedditClustering.v2 (Gregor Geigle and Nils Reimers and Andreas R{"u, 2021) ['eng'] Clustering s2s [Social, Web, Written] None None
RedditClusteringP2P.v2 (Gregor Geigle and Nils Reimers and Andreas R{"u, 2021) ['eng'] Clustering p2p [Social, Web, Written] {'test': 459389} {'test': {'num_samples': 459389, 'number_of_characters': 334286895, 'min_text_length': 79, 'average_text_length': 727.68, 'max_text_length': 4359, 'min_labels_per_text': 2, 'average_labels_per_text': 1.0, 'max_labels_per_text': 77908, 'unique_labels': 440, 'labels': {'FortNiteBR': {'count': 436}, 'buildapc': {'count': 8484}, 'offmychest': {'count': 570}, 'nus': {'count': 45}, 'relationship_advice': {'count': 16651}, 'premed': {'count': 201}, 'dogecoin': {'count': 8108}, 'GamingLaptops': {'count': 183}, 'asktransgender': {'count': 326}, 'MachineLearning': {'count': 61}, 'puppy101': {'count': 1597}, 'GunAccessoriesForSale': {'count': 2619}, 'Random_Acts_Of_Amazon': {'count': 1115}, 'Catholicism': {'count': 183}, 'MonsterHunter': {'count': 218}, 'tipofmypenis': {'count': 87}, 'samsung': {'count': 69}, 'PersonalFinanceCanada': {'count': 341}, 'Dyson_Sphere_Program': {'count': 55}, 'bleach': {'count': 41}, 'AmItheAsshole': {'count': 3730}, 'WallStreetbetsELITE': {'count': 328}, 'GlobalPowers': {'count': 35}, 'ABraThatFits': {'count': 159}, 'PokemonGoFriends': {'count': 1165}, 'NoMansSkyTheGame': {'count': 259}, 'masseffect': {'count': 233}, 'dating_advice': {'count': 559}, 'yoga': {'count': 50}, 'depression': {'count': 515}, 'COVID19positive': {'count': 180}, 'generationology': {'count': 37}, 'feedthebeast': {'count': 192}, 'EliteDangerous': {'count': 270}, 'alcoholicsanonymous': {'count': 93}, 'GoRVing': {'count': 35}, 'thedivision': {'count': 111}, 'breakingmom': {'count': 105}, 'AskAnAmerican': {'count': 80}, 'HypnoFair': {'count': 5}, 'JustUnsubbed': {'count': 13}, 'socialanxiety': {'count': 123}, 'dirtykikpals': {'count': 202}, 'askTO': {'count': 126}, 'AskCulinary': {'count': 108}, 'Bogleheads': {'count': 71}, 'dragonquest': {'count': 45}, 'NoContract': {'count': 30}, 'gorillaz': {'count': 14}, 'MondoGore': {'count': 8}, 'comicswap': {'count': 56}, 'VirtualYoutubers': {'count': 92}, 'Gta5Modding': {'count': 28}, 'obs': {'count': 61}, 'vcu': {'count': 9}, 'KingkillerChronicle': {'count': 17}, 'AmongUs': {'count': 41}, 'wireshark': {'count': 3}, 'Dodocodes': {'count': 46}, 'Aliexpress': {'count': 40}, 'LearnerDriverUK': {'count': 12}, 'PanicAttack': {'count': 23}, 'KassadinMains': {'count': 10}, 'islam': {'count': 93}, 'chronotrigger': {'count': 4}, 'skincareexchange': {'count': 13}, 'PokemonHome': {'count': 21}, 'survivinginfidelity': {'count': 71}, 'igcse': {'count': 21}, 'C25K': {'count': 21}, 'aorus': {'count': 2}, 'idleon': {'count': 19}, 'photography': {'count': 22}, 'cryptocoins': {'count': 7}, 'CanaryWharfBets': {'count': 7}, 'KillingEve': {'count': 7}, 'GameBuilderGarage': {'count': 16}, 'SauceSharingCommunity': {'count': 7}, 'turo': {'count': 9}, 'foodscience': {'count': 14}, 'HIMYM': {'count': 20}, 'HauntingOfHillHouse': {'count': 4}, 'GoodNotes': {'count': 8}, 'RedditWritesSeinfeld': {'count': 6}, 'AirReps': {'count': 2}, 'ADHD': {'count': 3811}, 'BuddyCrossing': {'count': 446}, 'libraryofruina': {'count': 98}, 'SluttyConfessions': {'count': 2787}, 'tipofmytongue': {'count': 7145}, 'fleshlight': {'count': 128}, 'amcstock': {'count': 13910}, 'teenagers': {'count': 77908}, 'suggestmeabook': {'count': 1540}, 'dirtypenpals': {'count': 5587}, 'MinecraftServer': {'count': 177}, 'CreditCards': {'count': 669}, 'Guitar': {'count': 10952}, 'rpg': {'count': 529}, 'NoFap': {'count': 14853}, 'lfg': {'count': 1093}, 'MarsWallStreet': {'count': 935}, 'SummonSign': {'count': 931}, 'AssassinsCreedValhala': {'count': 295}, 'hoi4': {'count': 432}, 'Coins4Sale': {'count': 260}, 'xbox': {'count': 459}, 'TooAfraidToAsk': {'count': 7404}, 'NBA2k': {'count': 553}, 'KGBTR': {'count': 943}, 'roblox': {'count': 220}, 'salesforce': {'count': 214}, 'TwoXChromosomes': {'count': 1736}, 'mechmarket': {'count': 4863}, 'Gaming_Headsets': {'count': 103}, 'pittsburgh': {'count': 189}, 'CryptoMars': {'count': 1606}, 'FridayNightFunkin': {'count': 378}, 'vaginismus': {'count': 122}, 'transpositive': {'count': 10}, 'comicbooks': {'count': 274}, 'BDSMcommunity': {'count': 185}, 'aliens': {'count': 201}, 'Scotch': {'count': 64}, 'KikRoleplay': {'count': 141}, 'Kayaking': {'count': 91}, '196': {'count': 47}, 'digimon': {'count': 140}, 'Evernote': {'count': 42}, 'logh': {'count': 22}, 'arlington': {'count': 15}, 'Adopted': {'count': 8}, 'DissonautUniverse': {'count': 4}, 'Midsommar': {'count': 12}, 'SofiawithanF': {'count': 83}, 'xmpp': {'count': 6}, 'ZombsRoyale': {'count': 16}, 'accesscontrol': {'count': 8}, 'WetlanderHumor': {'count': 2}, 'PoonamPandeyFanatics': {'count': 2}, 'screenplaychallenge': {'count': 2}, 'scatstories': {'count': 2}, 'techsupport': {'count': 290}, 'whatcarshouldIbuy': {'count': 79}, 'Stormlight_Archive': {'count': 15}, 'deadbydaylight': {'count': 126}, 'bicycling': {'count': 27}, 'oculus': {'count': 64}, 'Cartalk': {'count': 33}, 'Sims4': {'count': 43}, 'NoFeeAC': {'count': 95}, 'Crypto_com': {'count': 37}, 'ITCareerQuestions': {'count': 259}, 'aromantic': {'count': 18}, 'Revu': {'count': 3}, 'exalted': {'count': 2}, 'HilariaBaldwin': {'count': 20}, 'Testosterone': {'count': 35}, 'Screenwriting': {'count': 170}, 'LifeProTips': {'count': 49}, 'steinsgate': {'count': 13}, 'Baystreetbets': {'count': 10}, 'AskGirls': {'count': 7}, 'idlechampions': {'count': 7}, 'facebook': {'count': 17}, 'tf2trade': {'count': 4}, 'mfdoom': {'count': 3}, 'FiddlesticksMains': {'count': 2}, 'HFY': {'count': 10}, 'FiestaST': {'count': 2}, 'whatsthatbook': {'count': 994}, 'GearsOfWar': {'count': 879}, 'KazuhaMains': {'count': 175}, 'RepTime': {'count': 211}, 'AstroGaming': {'count': 141}, 'metalgearsolid': {'count': 152}, 'qBittorrent': {'count': 39}, 'ELLIPAL_Official': {'count': 24}, 'raisedbynarcissists': {'count': 4895}, 'unpopularopinion': {'count': 14901}, 'ACTrade': {'count': 5679}, 'askcarsales': {'count': 1339}, 'AskVet': {'count': 1357}, 'whowouldwin': {'count': 4493}, 'playstation': {'count': 1362}, 'anime': {'count': 6531}, 'GME': {'count': 12577}, 'DotA2': {'count': 2004}, 'cryptostreetbets': {'count': 2241}, 'MonsterHunterWorld': {'count': 698}, 'Market76': {'count': 14274}, 'DnD': {'count': 5092}, 'leagueoflegends': {'count': 3683}, 'doordash_drivers': {'count': 1626}, 'theta_network': {'count': 489}, 'exmuslim': {'count': 1369}, 'gonewildaudio': {'count': 2998}, 'conspiracy': {'count': 3587}, 'heroesofthestorm': {'count': 535}, 'FanFiction': {'count': 2782}, 'Doom': {'count': 1251}, 'texas': {'count': 269}, 'Vent': {'count': 1738}, 'selfimprovement': {'count': 1284}, 'youtubers': {'count': 706}, 'askseddit': {'count': 237}, 'boardgames': {'count': 1237}, 'bravelydefault': {'count': 347}, 'ConquerorsBlade': {'count': 238}, 'ChronicPain': {'count': 527}, 'teenagersnew': {'count': 256}, 'brasil': {'count': 1092}, 'MatthiasSubmissions': {'count': 921}, 'MarylandUnemployment': {'count': 314}, 'SaltLakeCity': {'count': 411}, 'BokunoheroFanfiction': {'count': 155}, 'BenignExistence': {'count': 125}, 'GayYoungOldDating': {'count': 156}, 'Bible': {'count': 202}, 'haskell': {'count': 154}, 'seduction': {'count': 400}, 'fantasywriters': {'count': 262}, 'HiveOS': {'count': 100}, 'PerkByDaylight': {'count': 15}, 'Hedgehog': {'count': 73}, 'xmen': {'count': 263}, 'HyperRP': {'count': 122}, 'emotestories': {'count': 3}, 'tutanota': {'count': 135}, 'CultoftheFranklin': {'count': 46}, 'langrisser': {'count': 62}, 'CozyGrove': {'count': 61}, 'Sverigesforsvarsmakt': {'count': 12}, 'silverbugbets': {'count': 21}, 'WreckingBallMains': {'count': 5}, 'capitalism_in_decay': {'count': 8}, 'paintdotnet': {'count': 11}, 'u_mawadom118': {'count': 4}, 'xboxfindfriends': {'count': 2}, 'CPTSD': {'count': 540}, 'destiny2': {'count': 318}, 'Wallstreetsilver': {'count': 1013}, 'DestinyTheGame': {'count': 1107}, 'blackopscoldwar': {'count': 400}, 'InstacartShoppers': {'count': 202}, 'RocketLeagueExchange': {'count': 832}, 'apexlegends': {'count': 3265}, 'kansascity': {'count': 53}, 'namenerds': {'count': 235}, 'help': {'count': 152}, 'Kengan_Ashura': {'count': 132}, 'thetagang': {'count': 165}, 'GameSale': {'count': 262}, 'Reduction': {'count': 109}, 'sex': {'count': 906}, 'bostonr4r': {'count': 75}, 'LegendsOfRuneterra': {'count': 231}, 'overlord': {'count': 48}, 'madisonwi': {'count': 53}, 'steelseries': {'count': 79}, 'ClashOfClansRecruit': {'count': 214}, 'CharacterRant': {'count': 55}, 'AirForce': {'count': 94}, 'sexstories': {'count': 92}, 'NameThatSong': {'count': 162}, 'depressed': {'count': 74}, 'ibs': {'count': 150}, '40kLore': {'count': 269}, 'podcasts': {'count': 88}, 'miraculousladybug': {'count': 150}, 'ask': {'count': 224}, 'EverMerge': {'count': 31}, 'TMJ': {'count': 54}, 'BitLifeApp': {'count': 39}, 'FireEmblemHeroes': {'count': 100}, 'software': {'count': 62}, 'ShieldAndroidTV': {'count': 70}, 'GriefSupport': {'count': 125}, 'onewheel': {'count': 37}, 'MensRights': {'count': 80}, 'nhl': {'count': 22}, 'ClashOfClans': {'count': 107}, 'ps3homebrew': {'count': 33}, 'LightNovels': {'count': 77}, 'redsox': {'count': 34}, 'CryptoMarkets': {'count': 44}, 'ugly': {'count': 47}, 'GCXRep': {'count': 12}, 'cscareerquestionsEU': {'count': 65}, 'MindHunter': {'count': 6}, 'starcraft2coop': {'count': 15}, 'nanocurrency': {'count': 1421}, 'ModelCars': {'count': 8}, 'UKJobs': {'count': 30}, 'Netherlands': {'count': 44}, 'clonewars': {'count': 8}, 'Julia': {'count': 11}, 'Prolactinoma': {'count': 9}, 'sofi': {'count': 11}, 'royalfamily': {'count': 6}, 'ConnecticutR4R': {'count': 8}, 'weather': {'count': 5}, 'oneui': {'count': 7}, 'KTM': {'count': 5}, 'Aerials': {'count': 3}, 'seoul': {'count': 2}, 'exjw': {'count': 3281}, 'ModernMagic': {'count': 699}, 'Paladins': {'count': 1242}, 'kdramarecommends': {'count': 1611}, 'hitbtc': {'count': 330}, 'endocrinology': {'count': 75}, 'Bath': {'count': 43}, 'NassauCountyHookups': {'count': 5}, 'feminineboys': {'count': 1248}, 'dreamsmp': {'count': 2018}, 'SquaredCircle': {'count': 2255}, 'Minecraft': {'count': 8753}, 'spirituality': {'count': 1809}, 'Eldenring': {'count': 1471}, 'Sat': {'count': 1172}, 'bonnaroo': {'count': 194}, 'gardening': {'count': 1892}, 'Unemployment': {'count': 6185}, 'mac': {'count': 1847}, 'Bestbuy': {'count': 437}, 'quittingkratom': {'count': 1081}, 'lawschooladmissions': {'count': 3436}, 'NiceHash': {'count': 2135}, 'McMaster': {'count': 815}, 'covidlonghaulers': {'count': 1299}, 'stalker': {'count': 758}, 'MLBTheShow': {'count': 2721}, 'FortniteCompetitive': {'count': 998}, 'dpdr': {'count': 514}, 'appliancerepair': {'count': 720}, 'thomasthetankengine': {'count': 207}, 'delhi': {'count': 217}, 'Huel': {'count': 300}, 'leafs': {'count': 203}, 'HotWheels': {'count': 170}, '90dayfianceuncensored': {'count': 550}, 'Throwers': {'count': 142}, 'Wavyhair': {'count': 270}, 'CryptoHorde': {'count': 128}, 'ShuumatsuNoValkyrie': {'count': 453}, 'TeensMeetTeens': {'count': 432}, 'dbrand': {'count': 108}, 'SLFmeetups': {'count': 18}, '1200isplentyketo': {'count': 48}, 'passive_income': {'count': 211}, 'BroadCity': {'count': 16}, 'RevenantMain': {'count': 71}, 'extrarfl': {'count': 25}, 'AgonGame': {'count': 5}, 'FitnessDE': {'count': 3}, 'gaming': {'count': 1277}, 'livesound': {'count': 91}, 'IBO': {'count': 1896}, 'EscapefromTarkov': {'count': 1300}, 'amex': {'count': 145}, 'DMAcademy': {'count': 1411}, 'VinylCollectors': {'count': 556}, 'cardano': {'count': 716}, 'brave_browser': {'count': 159}, 'dating': {'count': 952}, 'OculusQuest': {'count': 942}, 'Superstonk': {'count': 3089}, 'MtF': {'count': 957}, 'findaleague': {'count': 207}, 'Nioh': {'count': 398}, 'IRS': {'count': 715}, 'transgendercirclejerk': {'count': 353}, 'learnmath': {'count': 489}, 'piano': {'count': 263}, 'LeagueConnect': {'count': 216}, 'eu4': {'count': 561}, 'Wordpress': {'count': 345}, 'RoleplayingForReddit': {'count': 31}, 'LOONA': {'count': 89}, 'newtothenavy': {'count': 167}, 'HaircareScience': {'count': 118}, 'appletv': {'count': 167}, 'sissypersonals': {'count': 102}, 'raleigh': {'count': 168}, 'realonlyfansreviews': {'count': 21}, 'AskGames': {'count': 49}, 'PokemonTCG': {'count': 325}, 'controlgame': {'count': 109}, 'GoogleDataStudio': {'count': 16}, 'WhiteWolfRPG': {'count': 139}, 'MECoOp': {'count': 31}, 'snuffrp': {'count': 46}, 'lockpicking': {'count': 103}, 'wicked_edge': {'count': 105}, 'BMW': {'count': 99}, 'choiceofgames': {'count': 24}, 'hisdarkmaterials': {'count': 12}, 'SakuraGakuin': {'count': 24}, 'detrans': {'count': 55}, 'Smallville': {'count': 37}, 'kingofqueens': {'count': 7}, 'JamesHoffmann': {'count': 22}, 'stashinvest': {'count': 16}, 'ABA': {'count': 79}, 'ladybusiness': {'count': 10}, 'gamegrumps': {'count': 32}, 'GodEater': {'count': 21}, 'tomorrow': {'count': 39}, 'Tomorrowland': {'count': 9}, 'BlackCountryNewRoad': {'count': 5}, 'STAYC': {'count': 3}, 'SatoshiStreetBets': {'count': 3828}, 'AskLosAngeles': {'count': 1036}, 'buildapcforme': {'count': 1689}, 'ApplyingToCollege': {'count': 10675}, 'watercooling': {'count': 1209}, 'BreakUps': {'count': 4914}, 'FIFA': {'count': 3811}, 'emacs': {'count': 712}, 'trakstocks': {'count': 691}, 'Shittyaskflying': {'count': 147}, 'AmazonFC': {'count': 1178}, 'stocks': {'count': 4610}, 'BangaloreMains': {'count': 26}, 'pokemon': {'count': 3953}, 'religion': {'count': 684}, 'cuboulder': {'count': 269}, 'self': {'count': 1688}, 'tarot': {'count': 912}, 'turtles': {'count': 49}, 'TheMagnusArchives': {'count': 300}, 'Superhero_Ideas': {'count': 34}, 'NTU': {'count': 308}, 'touhou': {'count': 623}, 'JoJolion': {'count': 50}, 'lasers': {'count': 27}, 'popperpigs': {'count': 67}, 'aggretsuko': {'count': 20}, 'Library': {'count': 5}}}}
RenderedSST2 ['eng'] ZeroShotClassification i2t [Reviews] {'test': 1821} {'test': {'num_samples': 1821, 'unique_num_labels': 2, 'min_image_width': 448, 'average_image_width': 448.0, 'max_image_width': 448, 'min_image_height': 448, 'average_image_height': 448.0, 'max_image_height': 448, 'min_label_text_length': 28, 'average_label_text_length': 28.0, 'max_label_text_length': 28, 'labels': {'0': {'count': 912}, '1': {'count': 909}}}}
RestaurantReviewSentimentClassification (ElSahar et al., 2015) ['ara'] Classification s2s [Reviews, Written] None None
RiaNewsRetrieval (Gavrilov et al., 2019) ['rus'] Retrieval s2p [News, Written] None None
RiaNewsRetrievalHardNegatives (Gavrilov et al., 2019) ['rus'] Retrieval s2p [News, Written] None None
Robust04InstructionRetrieval (Orion Weller, 2024) ['eng'] InstructionRetrieval s2p [News, Written] None None
RomaTalesBitextMining ['hun', 'rom'] BitextMining s2s [Fiction, Written] None None
RomaniBibleClustering ['rom'] Clustering p2p [Religious, Written] None None
RomanianReviewsSentiment (Anca Maria Tache, 2021) ['ron'] Classification s2s [Reviews, Written] None None
RomanianSentimentClassification (Dumitrescu et al., 2020) ['ron'] Classification s2s [Reviews, Written] None None
RonSTS (Dumitrescu et al., 2021) ['ron'] STS s2s [News, Social, Web, Written] None None
RuBQReranking (Ivan Rybin, 2021) ['rus'] Reranking s2p [Encyclopaedic, Written] None None
RuBQRetrieval (Ivan Rybin, 2021) ['rus'] Retrieval s2p [Encyclopaedic, Written] None None
RuReviewsClassification (Sergey Smetanin, 2019) ['rus'] Classification p2p [Reviews, Written] None None
RuSTSBenchmarkSTS (Philip May, 2021) ['rus'] STS s2s [News, Social, Web, Written] None None
RuSciBenchGRNTIClassification ['rus'] Classification p2p [Academic, Written] None None
RuSciBenchGRNTIClusteringP2P ['rus'] Clustering p2p [Academic, Written] {'test': 2048} {'test': {'num_samples': 2048, 'number_of_characters': 1822339, 'min_text_length': 84, 'average_text_length': 889.81, 'max_text_length': 3143, 'min_labels_per_text': 73, 'average_labels_per_text': 1.0, 'max_labels_per_text': 74, 'unique_labels': 28, 'labels': {'3': {'count': 73}, '4': {'count': 73}, '20': {'count': 73}, '9': {'count': 73}, '21': {'count': 73}, '15': {'count': 73}, '16': {'count': 74}, '2': {'count': 73}, '8': {'count': 73}, '23': {'count': 73}, '6': {'count': 73}, '24': {'count': 73}, '10': {'count': 73}, '1': {'count': 73}, '17': {'count': 74}, '14': {'count': 74}, '18': {'count': 73}, '27': {'count': 73}, '19': {'count': 73}, '22': {'count': 73}, '12': {'count': 73}, '25': {'count': 73}, '5': {'count': 74}, '0': {'count': 73}, '26': {'count': 73}, '11': {'count': 73}, '13': {'count': 73}, '7': {'count': 73}}}}
RuSciBenchOECDClassification ['rus'] Classification p2p [Academic, Written] None None
RuSciBenchOECDClusteringP2P ['rus'] Clustering p2p [Academic, Written] None None
SAMSumFa ['fas'] BitextMining s2p [Spoken] None None
SCDBPAccountabilityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDBPAuditsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDBPCertificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDBPTrainingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDBPVerificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDDAccountabilityLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDDAuditsLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDDCertificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDDTrainingLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCDDVerificationLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
SCIDOCS (Arman Cohan, 2020) ['eng'] Retrieval s2p [Academic, Non-fiction, Written] None None
SCIDOCS-Fa ['fas'] Retrieval s2p [Academic] None None
SCIDOCS-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Academic, Non-fiction, Written] None None
SCIDOCS-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p None None
SDSEyeProtectionClassification (Kasmaee et al., 2024) ['eng'] Classification s2p [Chemistry] None None
SDSGlovesClassification (Kasmaee et al., 2024) ['eng'] Classification s2p [Chemistry] None None
SIB200Classification (Adelani et al., 2023) ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] Classification s2s [News, Written] None None
SIB200ClusteringS2S (Adelani et al., 2023) ['ace', 'acm', 'acq', 'aeb', 'afr', 'ajp', 'aka', 'als', 'amh', 'apc', 'arb', 'ars', 'ary', 'arz', 'asm', 'ast', 'awa', 'ayr', 'azb', 'azj', 'bak', 'bam', 'ban', 'bel', 'bem', 'ben', 'bho', 'bjn', 'bod', 'bos', 'bug', 'bul', 'cat', 'ceb', 'ces', 'cjk', 'ckb', 'crh', 'cym', 'dan', 'deu', 'dik', 'dyu', 'dzo', 'ell', 'eng', 'epo', 'est', 'eus', 'ewe', 'fao', 'fij', 'fin', 'fon', 'fra', 'fur', 'fuv', 'gaz', 'gla', 'gle', 'glg', 'grn', 'guj', 'hat', 'hau', 'heb', 'hin', 'hne', 'hrv', 'hun', 'hye', 'ibo', 'ilo', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kac', 'kam', 'kan', 'kas', 'kat', 'kaz', 'kbp', 'kea', 'khk', 'khm', 'kik', 'kin', 'kir', 'kmb', 'kmr', 'knc', 'kon', 'kor', 'lao', 'lij', 'lim', 'lin', 'lit', 'lmo', 'ltg', 'ltz', 'lua', 'lug', 'luo', 'lus', 'lvs', 'mag', 'mai', 'mal', 'mar', 'min', 'mkd', 'mlt', 'mni', 'mos', 'mri', 'mya', 'nld', 'nno', 'nob', 'npi', 'nqo', 'nso', 'nus', 'nya', 'oci', 'ory', 'pag', 'pan', 'pap', 'pbt', 'pes', 'plt', 'pol', 'por', 'prs', 'quy', 'ron', 'run', 'rus', 'sag', 'san', 'sat', 'scn', 'shn', 'sin', 'slk', 'slv', 'smo', 'sna', 'snd', 'som', 'sot', 'spa', 'srd', 'srp', 'ssw', 'sun', 'swe', 'swh', 'szl', 'tam', 'taq', 'tat', 'tel', 'tgk', 'tgl', 'tha', 'tir', 'tpi', 'tsn', 'tso', 'tuk', 'tum', 'tur', 'twi', 'tzm', 'uig', 'ukr', 'umb', 'urd', 'uzn', 'vec', 'vie', 'war', 'wol', 'xho', 'ydd', 'yor', 'yue', 'zho', 'zsm', 'zul'] Clustering s2s [News, Written] None None
SICK-BR-PC ['por'] PairClassification s2s [Web, Written] None None
SICK-BR-STS ['por'] STS s2s [Web, Written] None None
SICK-E-PL ['pol'] PairClassification s2s None None
SICK-R ['eng'] STS s2s [Web, Written] None None
SICK-R-PL ['pol'] STS s2s [Web, Written] None None
SICKFr ['fra'] STS s2s None None
SIDClassification ['fas'] Classification p2p [Academic] None None
SIDClustring ['fas'] Clustering p2p [Academic] None None
SIQA (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
SKQuadRetrieval ['slk'] Retrieval s2s [Encyclopaedic] None None
SNLHierarchicalClusteringP2P (Navjord et al., 2023) ['nob'] Clustering p2p [Encyclopaedic, Non-fiction, Written] None None
SNLHierarchicalClusteringS2S (Navjord et al., 2023) ['nob'] Clustering s2s [Encyclopaedic, Non-fiction, Written] None None
SNLRetrieval (Navjord et al., 2023) ['nob'] Retrieval p2p [Encyclopaedic, Non-fiction, Written] None None
SOPI2IRetrieval (Oh Song et al., 2016) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 240106} {'test': {'number_of_characters': 0, 'num_samples': 240106, 'num_queries': 120053, 'num_documents': 120053, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 120053, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 120053, 'min_relevant_docs_per_query': 2, 'average_relevant_docs_per_query': 7.0, 'max_relevant_docs_per_query': 12, 'unique_relevant_docs': 120053}}
SRNCorpusBitextMining (Zwennicker et al., 2022) ['nld', 'srn'] BitextMining s2s [Social, Web, Written] None None
STL10 (Coates et al., 2011) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 8000} {'test': {'num_samples': 8000, 'unique_num_labels': 10, 'min_image_width': 96, 'average_image_width': 96.0, 'max_image_width': 96, 'min_image_height': 96, 'average_image_height': 96.0, 'max_image_height': 96, 'labels': {'0': {'count': 800}, '1': {'count': 800}, '2': {'count': 800}, '3': {'count': 800}, '4': {'count': 800}, '5': {'count': 800}, '6': {'count': 800}, '7': {'count': 800}, '8': {'count': 800}, '9': {'count': 800}}}}
STL10ZeroShot (Coates et al., 2011) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 8000} {'test': {'num_samples': 8000, 'unique_num_labels': 10, 'min_image_width': 96, 'average_image_width': 96.0, 'max_image_width': 96, 'min_image_height': 96, 'average_image_height': 96.0, 'max_image_height': 96, 'min_label_text_length': 17, 'average_label_text_length': 18.5, 'max_label_text_length': 22, 'labels': {'0': {'count': 800}, '1': {'count': 800}, '2': {'count': 800}, '3': {'count': 800}, '4': {'count': 800}, '5': {'count': 800}, '6': {'count': 800}, '7': {'count': 800}, '8': {'count': 800}, '9': {'count': 800}}}}
STS12 (Agirre et al., 2012) ['eng'] STS s2s [Encyclopaedic, News, Written] {'test': 3108} {'test': {'num_samples': 3108, 'number_of_characters': 402118, 'min_sentence1_length': 3, 'average_sentence1_len': 63.79, 'max_sentence1_length': 220, 'unique_sentence1': 2236, 'min_sentence2_length': 7, 'average_sentence2_len': 65.59, 'max_sentence2_length': 204, 'unique_sentence2': 2797, 'min_score': 0.0, 'avg_score': 3.51, 'max_score': 5.0}}
STS12VisualSTS (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [Encyclopaedic, News, Written] {'test': 3108} {'test': {'num_samples': 3108, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 3.51, 'max_score': 5.0}}
STS13 (Eneko Agirre, 2013) ['eng'] STS s2s [News, Non-fiction, Web, Written] None None
STS13VisualSTS (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [News, Non-fiction, Web, Written] {'test': 1500} {'test': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.34, 'max_score': 5.0}}
STS14 ['eng'] STS s2s [Blog, Spoken, Web] None None
STS14VisualSTS (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [Blog, Spoken, Web] {'test': 3750} {'test': {'num_samples': 3750, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.81, 'max_score': 5.0}}
STS15 ['eng'] STS s2s [Blog, News, Spoken, Web, Written] None None
STS15VisualSTS (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [Blog, News, Spoken, Web, Written] {'test': 3000} {'test': {'num_samples': 3000, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.41, 'max_score': 5.0}}
STS16 ['eng'] STS s2s [Blog, Spoken, Web] None None
STS16VisualSTS (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [Blog, Spoken, Web] {'test': 1186} {'test': {'num_samples': 1186, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.41, 'max_score': 5.0}}
STS17 ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] STS s2s [News, Web, Written] {'test': 5346} {'test': {'num_samples': 5346, 'number_of_characters': 400264, 'min_sentence1_length': 6, 'average_sentence1_len': 38.15, 'max_sentence1_length': 976, 'unique_sentence1': 4900, 'min_sentence2_length': 6, 'average_sentence2_len': 36.73, 'max_sentence2_length': 1007, 'unique_sentence2': 4470, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'number_of_characters': 183387, 'min_sentence1_length': 6, 'average_sentence1_len': 31.99, 'max_sentence1_length': 976, 'unique_sentence1': 2650, 'min_sentence2_length': 6, 'average_sentence2_len': 32.44, 'max_sentence2_length': 1007, 'unique_sentence2': 2720, 'min_score': 0.0, 'avg_score': 2.47, 'max_score': 5.0}, 'ar-ar': {'num_samples': 250, 'number_of_characters': 16247, 'min_sentence1_length': 11, 'average_sentence1_len': 32.21, 'max_sentence1_length': 99, 'unique_sentence1': 250, 'min_sentence2_length': 9, 'average_sentence2_len': 32.78, 'max_sentence2_length': 83, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.22, 'max_score': 5.0}, 'en-ar': {'num_samples': 250, 'number_of_characters': 18764, 'min_sentence1_length': 13, 'average_sentence1_len': 42.36, 'max_sentence1_length': 105, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 32.7, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.14, 'max_score': 5.0}, 'en-de': {'num_samples': 250, 'number_of_characters': 22177, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 44.76, 'max_sentence2_length': 104, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-en': {'num_samples': 250, 'number_of_characters': 21669, 'min_sentence1_length': 12, 'average_sentence1_len': 43.95, 'max_sentence1_length': 94, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-tr': {'num_samples': 250, 'number_of_characters': 20879, 'min_sentence1_length': 15, 'average_sentence1_len': 41.92, 'max_sentence1_length': 101, 'unique_sentence1': 250, 'min_sentence2_length': 10, 'average_sentence2_len': 41.6, 'max_sentence2_length': 107, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.13, 'max_score': 5.0}, 'es-en': {'num_samples': 250, 'number_of_characters': 23216, 'min_sentence1_length': 12, 'average_sentence1_len': 50.84, 'max_sentence1_length': 160, 'unique_sentence1': 250, 'min_sentence2_length': 14, 'average_sentence2_len': 42.02, 'max_sentence2_length': 117, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.15, 'max_score': 5.0}, 'es-es': {'num_samples': 250, 'number_of_characters': 25265, 'min_sentence1_length': 18, 'average_sentence1_len': 49.84, 'max_sentence1_length': 136, 'unique_sentence1': 250, 'min_sentence2_length': 13, 'average_sentence2_len': 51.22, 'max_sentence2_length': 129, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.23, 'max_score': 5.0}, 'fr-en': {'num_samples': 250, 'number_of_characters': 23087, 'min_sentence1_length': 19, 'average_sentence1_len': 49.62, 'max_sentence1_length': 115, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'it-en': {'num_samples': 250, 'number_of_characters': 23188, 'min_sentence1_length': 15, 'average_sentence1_len': 50.03, 'max_sentence1_length': 113, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'nl-en': {'num_samples': 250, 'number_of_characters': 22385, 'min_sentence1_length': 14, 'average_sentence1_len': 46.82, 'max_sentence1_length': 123, 'unique_sentence1': 250, 'min_sentence2_length': 15, 'average_sentence2_len': 42.72, 'max_sentence2_length': 101, 'unique_sentence2': 250, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}}}}
STS17MultilingualVisualSTS (Xiao et al., 2024) ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] VisualSTS(multi) i2i [News, Social, Spoken, Web, Written] {'test': 5346} {'test': {'num_samples': 5346, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'ko-ko': {'num_samples': 2846, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.47, 'max_score': 5.0}, 'ar-ar': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.22, 'max_score': 5.0}, 'en-ar': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.14, 'max_score': 5.0}, 'en-de': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-en': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'en-tr': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.13, 'max_score': 5.0}, 'es-en': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.15, 'max_score': 5.0}, 'es-es': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.23, 'max_score': 5.0}, 'fr-en': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'it-en': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}, 'nl-en': {'num_samples': 250, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.28, 'max_score': 5.0}}}}
STS22.v2 ['ara', 'cmn', 'deu', 'eng', 'fra', 'ita', 'pol', 'rus', 'spa', 'tur'] STS p2p [News, Written] None None
STSB (Shitao Xiao, 2024) ['cmn'] STS s2s None None
STSBenchmark (Philip May, 2021) ['eng'] STS s2s [Blog, News, Written] None None
STSBenchmarkMultilingualSTS (Philip May, 2021) ['cmn', 'deu', 'eng', 'fra', 'ita', 'nld', 'pol', 'por', 'rus', 'spa'] STS s2s [News, Social, Spoken, Web, Written] None None
STSBenchmarkMultilingualVisualSTS (Xiao et al., 2024) ['cmn', 'deu', 'eng', 'fra', 'ita', 'nld', 'pol', 'por', 'rus', 'spa'] VisualSTS(multi) i2i [News, Social, Spoken, Web, Written] {'dev': 15000, 'test': 13790} {'dev': {'num_samples': 15000, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'en': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'de': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'es': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'fr': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'it': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'nl': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'pl': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'pt': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'ru': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}, 'zh': {'num_samples': 1500, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.36, 'max_score': 5.0}}}, 'test': {'num_samples': 13790, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0, 'hf_subset_descriptive_stats': {'en': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'de': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'es': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'fr': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'it': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'nl': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'pl': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'pt': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'ru': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}, 'zh': {'num_samples': 1379, 'min_image1_width': 448, 'average_image1_width': 448.0, 'max_image1_width': 448, 'min_image1_height': 448, 'average_image1_height': 448.0, 'max_image1_height': 448, 'min_image2_width': 448, 'average_image2_width': 448.0, 'max_image2_width': 448, 'min_image2_height': 448, 'average_image2_height': 448.0, 'max_image2_height': 448, 'min_score': 0.0, 'avg_score': 2.61, 'max_score': 5.0}}}}
STSES (Agirre et al., 2015) ['spa'] STS s2s [Written] None None
SUN397 (Xiao et al., 2010) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 21750} {'test': {'num_samples': 21750, 'unique_num_labels': 397, 'min_image_width': 125, 'average_image_width': 354.22, 'max_image_width': 696, 'min_image_height': 94, 'average_image_height': 291.17, 'max_image_height': 595, 'labels': {'227': {'count': 439}, '213': {'count': 335}, '53': {'count': 23}, '350': {'count': 40}, '73': {'count': 38}, '316': {'count': 63}, '177': {'count': 80}, '25': {'count': 39}, '275': {'count': 31}, '328': {'count': 33}, '263': {'count': 47}, '239': {'count': 26}, '41': {'count': 213}, '319': {'count': 51}, '91': {'count': 16}, '95': {'count': 183}, '396': {'count': 20}, '259': {'count': 36}, '107': {'count': 167}, '381': {'count': 164}, '174': {'count': 167}, '246': {'count': 44}, '67': {'count': 31}, '374': {'count': 28}, '354': {'count': 22}, '72': {'count': 100}, '97': {'count': 32}, '256': {'count': 57}, '247': {'count': 57}, '159': {'count': 49}, '270': {'count': 135}, '133': {'count': 215}, '197': {'count': 40}, '12': {'count': 38}, '2': {'count': 226}, '115': {'count': 75}, '200': {'count': 93}, '47': {'count': 103}, '9': {'count': 37}, '22': {'count': 76}, '255': {'count': 34}, '267': {'count': 22}, '244': {'count': 93}, '85': {'count': 115}, '342': {'count': 87}, '55': {'count': 50}, '7': {'count': 41}, '337': {'count': 99}, '38': {'count': 28}, '269': {'count': 69}, '106': {'count': 15}, '298': {'count': 27}, '361': {'count': 53}, '8': {'count': 108}, '166': {'count': 47}, '280': {'count': 51}, '35': {'count': 61}, '147': {'count': 82}, '214': {'count': 26}, '284': {'count': 28}, '286': {'count': 66}, '113': {'count': 67}, '83': {'count': 38}, '82': {'count': 236}, '365': {'count': 17}, '242': {'count': 116}, '186': {'count': 38}, '87': {'count': 111}, '274': {'count': 48}, '27': {'count': 95}, '283': {'count': 22}, '4': {'count': 76}, '334': {'count': 139}, '364': {'count': 21}, '48': {'count': 408}, '311': {'count': 41}, '101': {'count': 64}, '131': {'count': 55}, '172': {'count': 31}, '355': {'count': 28}, '308': {'count': 56}, '5': {'count': 47}, '318': {'count': 155}, '86': {'count': 87}, '46': {'count': 230}, '111': {'count': 69}, '88': {'count': 54}, '23': {'count': 47}, '70': {'count': 61}, '217': {'count': 34}, '11': {'count': 76}, '193': {'count': 207}, '0': {'count': 99}, '303': {'count': 23}, '324': {'count': 47}, '377': {'count': 19}, '345': {'count': 39}, '154': {'count': 49}, '393': {'count': 68}, '152': {'count': 58}, '317': {'count': 27}, '384': {'count': 46}, '257': {'count': 38}, '294': {'count': 47}, '145': {'count': 23}, '289': {'count': 33}, '375': {'count': 19}, '57': {'count': 42}, '15': {'count': 62}, '109': {'count': 24}, '139': {'count': 24}, '66': {'count': 26}, '340': {'count': 32}, '150': {'count': 41}, '118': {'count': 105}, '333': {'count': 27}, '126': {'count': 55}, '366': {'count': 116}, '358': {'count': 151}, '251': {'count': 37}, '309': {'count': 35}, '54': {'count': 20}, '327': {'count': 38}, '3': {'count': 60}, '21': {'count': 56}, '17': {'count': 62}, '146': {'count': 84}, '94': {'count': 42}, '243': {'count': 48}, '335': {'count': 85}, '245': {'count': 141}, '279': {'count': 187}, '360': {'count': 25}, '192': {'count': 105}, '49': {'count': 31}, '230': {'count': 81}, '357': {'count': 22}, '64': {'count': 72}, '112': {'count': 26}, '338': {'count': 70}, '216': {'count': 99}, '234': {'count': 183}, '300': {'count': 153}, '188': {'count': 48}, '254': {'count': 41}, '184': {'count': 183}, '373': {'count': 47}, '221': {'count': 86}, '84': {'count': 49}, '81': {'count': 119}, '161': {'count': 97}, '352': {'count': 21}, '105': {'count': 43}, '39': {'count': 59}, '383': {'count': 40}, '341': {'count': 56}, '63': {'count': 158}, '125': {'count': 29}, '302': {'count': 83}, '262': {'count': 40}, '392': {'count': 51}, '326': {'count': 173}, '228': {'count': 93}, '339': {'count': 25}, '80': {'count': 73}, '30': {'count': 42}, '264': {'count': 112}, '56': {'count': 94}, '321': {'count': 16}, '395': {'count': 52}, '68': {'count': 45}, '211': {'count': 45}, '44': {'count': 26}, '299': {'count': 21}, '220': {'count': 35}, '61': {'count': 20}, '138': {'count': 55}, '108': {'count': 111}, '10': {'count': 35}, '386': {'count': 28}, '297': {'count': 49}, '210': {'count': 36}, '175': {'count': 77}, '260': {'count': 68}, '391': {'count': 69}, '102': {'count': 77}, '26': {'count': 44}, '232': {'count': 54}, '6': {'count': 158}, '124': {'count': 43}, '14': {'count': 23}, '201': {'count': 39}, '168': {'count': 18}, '202': {'count': 26}, '140': {'count': 31}, '261': {'count': 60}, '104': {'count': 27}, '356': {'count': 22}, '34': {'count': 147}, '225': {'count': 111}, '60': {'count': 84}, '156': {'count': 35}, '237': {'count': 45}, '268': {'count': 87}, '310': {'count': 31}, '249': {'count': 73}, '281': {'count': 46}, '75': {'count': 89}, '77': {'count': 53}, '132': {'count': 45}, '235': {'count': 42}, '336': {'count': 84}, '123': {'count': 27}, '349': {'count': 90}, '180': {'count': 49}, '378': {'count': 17}, '332': {'count': 30}, '185': {'count': 29}, '389': {'count': 60}, '382': {'count': 77}, '198': {'count': 54}, '74': {'count': 48}, '231': {'count': 85}, '76': {'count': 54}, '151': {'count': 64}, '182': {'count': 17}, '209': {'count': 39}, '344': {'count': 37}, '204': {'count': 67}, '329': {'count': 23}, '380': {'count': 91}, '388': {'count': 32}, '116': {'count': 29}, '24': {'count': 103}, '199': {'count': 33}, '369': {'count': 14}, '359': {'count': 77}, '325': {'count': 39}, '323': {'count': 34}, '162': {'count': 35}, '33': {'count': 46}, '129': {'count': 21}, '287': {'count': 30}, '155': {'count': 24}, '170': {'count': 157}, '296': {'count': 40}, '110': {'count': 102}, '304': {'count': 21}, '164': {'count': 37}, '278': {'count': 23}, '71': {'count': 18}, '194': {'count': 24}, '136': {'count': 117}, '103': {'count': 134}, '330': {'count': 26}, '347': {'count': 26}, '206': {'count': 50}, '178': {'count': 43}, '362': {'count': 26}, '119': {'count': 111}, '208': {'count': 33}, '165': {'count': 44}, '90': {'count': 36}, '167': {'count': 40}, '187': {'count': 26}, '99': {'count': 50}, '390': {'count': 64}, '205': {'count': 16}, '65': {'count': 30}, '293': {'count': 23}, '223': {'count': 19}, '96': {'count': 31}, '305': {'count': 44}, '100': {'count': 57}, '385': {'count': 18}, '78': {'count': 42}, '59': {'count': 20}, '37': {'count': 59}, '219': {'count': 76}, '212': {'count': 28}, '1': {'count': 26}, '122': {'count': 35}, '92': {'count': 62}, '43': {'count': 39}, '196': {'count': 56}, '19': {'count': 25}, '128': {'count': 35}, '376': {'count': 77}, '313': {'count': 30}, '114': {'count': 54}, '121': {'count': 31}, '169': {'count': 62}, '331': {'count': 55}, '238': {'count': 16}, '179': {'count': 31}, '127': {'count': 31}, '370': {'count': 98}, '149': {'count': 47}, '346': {'count': 41}, '250': {'count': 22}, '276': {'count': 25}, '163': {'count': 43}, '18': {'count': 33}, '282': {'count': 23}, '215': {'count': 33}, '258': {'count': 60}, '240': {'count': 29}, '233': {'count': 14}, '93': {'count': 27}, '69': {'count': 23}, '266': {'count': 26}, '387': {'count': 55}, '141': {'count': 18}, '191': {'count': 26}, '183': {'count': 42}, '271': {'count': 22}, '120': {'count': 32}, '98': {'count': 53}, '29': {'count': 34}, '28': {'count': 21}, '144': {'count': 26}, '351': {'count': 50}, '368': {'count': 20}, '314': {'count': 27}, '45': {'count': 17}, '218': {'count': 50}, '348': {'count': 25}, '157': {'count': 35}, '117': {'count': 24}, '367': {'count': 24}, '13': {'count': 31}, '363': {'count': 22}, '79': {'count': 28}, '312': {'count': 27}, '372': {'count': 29}, '189': {'count': 21}, '50': {'count': 22}, '160': {'count': 35}, '16': {'count': 39}, '222': {'count': 21}, '58': {'count': 37}, '153': {'count': 64}, '62': {'count': 21}, '290': {'count': 25}, '292': {'count': 24}, '285': {'count': 25}, '343': {'count': 32}, '301': {'count': 19}, '190': {'count': 46}, '195': {'count': 24}, '135': {'count': 30}, '315': {'count': 25}, '203': {'count': 29}, '307': {'count': 18}, '142': {'count': 25}, '173': {'count': 28}, '236': {'count': 41}, '171': {'count': 23}, '371': {'count': 17}, '130': {'count': 15}, '277': {'count': 39}, '248': {'count': 22}, '181': {'count': 35}, '40': {'count': 20}, '322': {'count': 15}, '273': {'count': 23}, '148': {'count': 23}, '295': {'count': 25}, '32': {'count': 21}, '320': {'count': 25}, '137': {'count': 32}, '253': {'count': 36}, '31': {'count': 19}, '306': {'count': 27}, '51': {'count': 19}, '52': {'count': 29}, '176': {'count': 31}, '241': {'count': 23}, '265': {'count': 32}, '394': {'count': 26}, '158': {'count': 26}, '226': {'count': 28}, '288': {'count': 21}, '353': {'count': 19}, '291': {'count': 21}, '224': {'count': 26}, '36': {'count': 38}, '20': {'count': 22}, '252': {'count': 18}, '134': {'count': 24}, '143': {'count': 21}, '207': {'count': 28}, '89': {'count': 16}, '272': {'count': 23}, '379': {'count': 24}, '229': {'count': 20}, '42': {'count': 23}}}}
SUN397ZeroShot (Xiao et al., 2010) ['eng'] ZeroShotClassification i2t [Encyclopaedic] {'test': 21750} {'test': {'num_samples': 21750, 'unique_num_labels': 397, 'min_image_width': 125, 'average_image_width': 354.22, 'max_image_width': 696, 'min_image_height': 94, 'average_image_height': 291.17, 'max_image_height': 595, 'min_label_text_length': 17, 'average_label_text_length': 25.9, 'max_label_text_length': 41, 'labels': {'227': {'count': 439}, '213': {'count': 335}, '53': {'count': 23}, '350': {'count': 40}, '73': {'count': 38}, '316': {'count': 63}, '177': {'count': 80}, '25': {'count': 39}, '275': {'count': 31}, '328': {'count': 33}, '263': {'count': 47}, '239': {'count': 26}, '41': {'count': 213}, '319': {'count': 51}, '91': {'count': 16}, '95': {'count': 183}, '396': {'count': 20}, '259': {'count': 36}, '107': {'count': 167}, '381': {'count': 164}, '174': {'count': 167}, '246': {'count': 44}, '67': {'count': 31}, '374': {'count': 28}, '354': {'count': 22}, '72': {'count': 100}, '97': {'count': 32}, '256': {'count': 57}, '247': {'count': 57}, '159': {'count': 49}, '270': {'count': 135}, '133': {'count': 215}, '197': {'count': 40}, '12': {'count': 38}, '2': {'count': 226}, '115': {'count': 75}, '200': {'count': 93}, '47': {'count': 103}, '9': {'count': 37}, '22': {'count': 76}, '255': {'count': 34}, '267': {'count': 22}, '244': {'count': 93}, '85': {'count': 115}, '342': {'count': 87}, '55': {'count': 50}, '7': {'count': 41}, '337': {'count': 99}, '38': {'count': 28}, '269': {'count': 69}, '106': {'count': 15}, '298': {'count': 27}, '361': {'count': 53}, '8': {'count': 108}, '166': {'count': 47}, '280': {'count': 51}, '35': {'count': 61}, '147': {'count': 82}, '214': {'count': 26}, '284': {'count': 28}, '286': {'count': 66}, '113': {'count': 67}, '83': {'count': 38}, '82': {'count': 236}, '365': {'count': 17}, '242': {'count': 116}, '186': {'count': 38}, '87': {'count': 111}, '274': {'count': 48}, '27': {'count': 95}, '283': {'count': 22}, '4': {'count': 76}, '334': {'count': 139}, '364': {'count': 21}, '48': {'count': 408}, '311': {'count': 41}, '101': {'count': 64}, '131': {'count': 55}, '172': {'count': 31}, '355': {'count': 28}, '308': {'count': 56}, '5': {'count': 47}, '318': {'count': 155}, '86': {'count': 87}, '46': {'count': 230}, '111': {'count': 69}, '88': {'count': 54}, '23': {'count': 47}, '70': {'count': 61}, '217': {'count': 34}, '11': {'count': 76}, '193': {'count': 207}, '0': {'count': 99}, '303': {'count': 23}, '324': {'count': 47}, '377': {'count': 19}, '345': {'count': 39}, '154': {'count': 49}, '393': {'count': 68}, '152': {'count': 58}, '317': {'count': 27}, '384': {'count': 46}, '257': {'count': 38}, '294': {'count': 47}, '145': {'count': 23}, '289': {'count': 33}, '375': {'count': 19}, '57': {'count': 42}, '15': {'count': 62}, '109': {'count': 24}, '139': {'count': 24}, '66': {'count': 26}, '340': {'count': 32}, '150': {'count': 41}, '118': {'count': 105}, '333': {'count': 27}, '126': {'count': 55}, '366': {'count': 116}, '358': {'count': 151}, '251': {'count': 37}, '309': {'count': 35}, '54': {'count': 20}, '327': {'count': 38}, '3': {'count': 60}, '21': {'count': 56}, '17': {'count': 62}, '146': {'count': 84}, '94': {'count': 42}, '243': {'count': 48}, '335': {'count': 85}, '245': {'count': 141}, '279': {'count': 187}, '360': {'count': 25}, '192': {'count': 105}, '49': {'count': 31}, '230': {'count': 81}, '357': {'count': 22}, '64': {'count': 72}, '112': {'count': 26}, '338': {'count': 70}, '216': {'count': 99}, '234': {'count': 183}, '300': {'count': 153}, '188': {'count': 48}, '254': {'count': 41}, '184': {'count': 183}, '373': {'count': 47}, '221': {'count': 86}, '84': {'count': 49}, '81': {'count': 119}, '161': {'count': 97}, '352': {'count': 21}, '105': {'count': 43}, '39': {'count': 59}, '383': {'count': 40}, '341': {'count': 56}, '63': {'count': 158}, '125': {'count': 29}, '302': {'count': 83}, '262': {'count': 40}, '392': {'count': 51}, '326': {'count': 173}, '228': {'count': 93}, '339': {'count': 25}, '80': {'count': 73}, '30': {'count': 42}, '264': {'count': 112}, '56': {'count': 94}, '321': {'count': 16}, '395': {'count': 52}, '68': {'count': 45}, '211': {'count': 45}, '44': {'count': 26}, '299': {'count': 21}, '220': {'count': 35}, '61': {'count': 20}, '138': {'count': 55}, '108': {'count': 111}, '10': {'count': 35}, '386': {'count': 28}, '297': {'count': 49}, '210': {'count': 36}, '175': {'count': 77}, '260': {'count': 68}, '391': {'count': 69}, '102': {'count': 77}, '26': {'count': 44}, '232': {'count': 54}, '6': {'count': 158}, '124': {'count': 43}, '14': {'count': 23}, '201': {'count': 39}, '168': {'count': 18}, '202': {'count': 26}, '140': {'count': 31}, '261': {'count': 60}, '104': {'count': 27}, '356': {'count': 22}, '34': {'count': 147}, '225': {'count': 111}, '60': {'count': 84}, '156': {'count': 35}, '237': {'count': 45}, '268': {'count': 87}, '310': {'count': 31}, '249': {'count': 73}, '281': {'count': 46}, '75': {'count': 89}, '77': {'count': 53}, '132': {'count': 45}, '235': {'count': 42}, '336': {'count': 84}, '123': {'count': 27}, '349': {'count': 90}, '180': {'count': 49}, '378': {'count': 17}, '332': {'count': 30}, '185': {'count': 29}, '389': {'count': 60}, '382': {'count': 77}, '198': {'count': 54}, '74': {'count': 48}, '231': {'count': 85}, '76': {'count': 54}, '151': {'count': 64}, '182': {'count': 17}, '209': {'count': 39}, '344': {'count': 37}, '204': {'count': 67}, '329': {'count': 23}, '380': {'count': 91}, '388': {'count': 32}, '116': {'count': 29}, '24': {'count': 103}, '199': {'count': 33}, '369': {'count': 14}, '359': {'count': 77}, '325': {'count': 39}, '323': {'count': 34}, '162': {'count': 35}, '33': {'count': 46}, '129': {'count': 21}, '287': {'count': 30}, '155': {'count': 24}, '170': {'count': 157}, '296': {'count': 40}, '110': {'count': 102}, '304': {'count': 21}, '164': {'count': 37}, '278': {'count': 23}, '71': {'count': 18}, '194': {'count': 24}, '136': {'count': 117}, '103': {'count': 134}, '330': {'count': 26}, '347': {'count': 26}, '206': {'count': 50}, '178': {'count': 43}, '362': {'count': 26}, '119': {'count': 111}, '208': {'count': 33}, '165': {'count': 44}, '90': {'count': 36}, '167': {'count': 40}, '187': {'count': 26}, '99': {'count': 50}, '390': {'count': 64}, '205': {'count': 16}, '65': {'count': 30}, '293': {'count': 23}, '223': {'count': 19}, '96': {'count': 31}, '305': {'count': 44}, '100': {'count': 57}, '385': {'count': 18}, '78': {'count': 42}, '59': {'count': 20}, '37': {'count': 59}, '219': {'count': 76}, '212': {'count': 28}, '1': {'count': 26}, '122': {'count': 35}, '92': {'count': 62}, '43': {'count': 39}, '196': {'count': 56}, '19': {'count': 25}, '128': {'count': 35}, '376': {'count': 77}, '313': {'count': 30}, '114': {'count': 54}, '121': {'count': 31}, '169': {'count': 62}, '331': {'count': 55}, '238': {'count': 16}, '179': {'count': 31}, '127': {'count': 31}, '370': {'count': 98}, '149': {'count': 47}, '346': {'count': 41}, '250': {'count': 22}, '276': {'count': 25}, '163': {'count': 43}, '18': {'count': 33}, '282': {'count': 23}, '215': {'count': 33}, '258': {'count': 60}, '240': {'count': 29}, '233': {'count': 14}, '93': {'count': 27}, '69': {'count': 23}, '266': {'count': 26}, '387': {'count': 55}, '141': {'count': 18}, '191': {'count': 26}, '183': {'count': 42}, '271': {'count': 22}, '120': {'count': 32}, '98': {'count': 53}, '29': {'count': 34}, '28': {'count': 21}, '144': {'count': 26}, '351': {'count': 50}, '368': {'count': 20}, '314': {'count': 27}, '45': {'count': 17}, '218': {'count': 50}, '348': {'count': 25}, '157': {'count': 35}, '117': {'count': 24}, '367': {'count': 24}, '13': {'count': 31}, '363': {'count': 22}, '79': {'count': 28}, '312': {'count': 27}, '372': {'count': 29}, '189': {'count': 21}, '50': {'count': 22}, '160': {'count': 35}, '16': {'count': 39}, '222': {'count': 21}, '58': {'count': 37}, '153': {'count': 64}, '62': {'count': 21}, '290': {'count': 25}, '292': {'count': 24}, '285': {'count': 25}, '343': {'count': 32}, '301': {'count': 19}, '190': {'count': 46}, '195': {'count': 24}, '135': {'count': 30}, '315': {'count': 25}, '203': {'count': 29}, '307': {'count': 18}, '142': {'count': 25}, '173': {'count': 28}, '236': {'count': 41}, '171': {'count': 23}, '371': {'count': 17}, '130': {'count': 15}, '277': {'count': 39}, '248': {'count': 22}, '181': {'count': 35}, '40': {'count': 20}, '322': {'count': 15}, '273': {'count': 23}, '148': {'count': 23}, '295': {'count': 25}, '32': {'count': 21}, '320': {'count': 25}, '137': {'count': 32}, '253': {'count': 36}, '31': {'count': 19}, '306': {'count': 27}, '51': {'count': 19}, '52': {'count': 29}, '176': {'count': 31}, '241': {'count': 23}, '265': {'count': 32}, '394': {'count': 26}, '158': {'count': 26}, '226': {'count': 28}, '288': {'count': 21}, '353': {'count': 19}, '291': {'count': 21}, '224': {'count': 26}, '36': {'count': 38}, '20': {'count': 22}, '252': {'count': 18}, '134': {'count': 24}, '143': {'count': 21}, '207': {'count': 28}, '89': {'count': 16}, '272': {'count': 23}, '379': {'count': 24}, '229': {'count': 20}, '42': {'count': 23}}}}
SadeemQuestionRetrieval ['ara'] Retrieval s2p [Written, Written] None None
SanskritShlokasClassification ['san'] Classification s2s [Religious, Written] None None
ScalaClassification ['dan', 'nno', 'nob', 'swe'] Classification s2s [Blog, Fiction, News, Non-fiction, Spoken, Web, Written] None None
SciDocsRR ['eng'] Reranking s2s [Academic, Non-fiction, Written] None None
SciFact (Arman Cohan, 2020) ['eng'] Retrieval s2p [Academic, Medical, Written] None None
SciFact-Fa ['fas'] Retrieval s2p [Academic] None None
SciFact-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Academic, Medical, Written] None None
SciFact-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Academic, Medical, Written] None None
SciMMIR (Siwei Wu, 2024) ['eng'] ZeroShotClassification i2t [Academic] {'test': 16263} {'test': {'num_samples': 16263, 'unique_num_labels': 5, 'min_image_width': 44, 'average_image_width': 706.75, 'max_image_width': 1607, 'min_image_height': 37, 'average_image_height': 396.71, 'max_image_height': 1439, 'min_label_text_length': 18, 'average_label_text_length': 22.4, 'max_label_text_length': 27, 'labels': {'0': {'count': 9488}, '4': {'count': 4229}, '3': {'count': 543}, '2': {'count': 467}, '1': {'count': 1536}}}}
SciMMIRI2TRetrieval (Wu et al., 2024) ['eng'] Any2AnyRetrieval i2t [Academic] {'test': 32526} {'test': {'number_of_characters': 4247786, 'num_samples': 32526, 'num_queries': 16263, 'num_documents': 16263, 'min_document_length': 21, 'average_document_length': 261.19, 'max_document_length': 2184, 'unique_documents': 16263, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 16263, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 16263}}
SciMMIRT2IRetrieval (Wu et al., 2024) ['eng'] Any2AnyRetrieval t2i [Academic] {'test': 32526} {'test': {'number_of_characters': 4247786, 'num_samples': 32526, 'num_queries': 16263, 'num_documents': 16263, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 16263, 'min_query_length': 21, 'average_query_length': 261.19, 'max_query_length': 2184, 'unique_queries': 16263, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 16263}}
SemRel24STS (Nedjma Ousidhoum, 2024) ['afr', 'amh', 'arb', 'arq', 'ary', 'eng', 'hau', 'hin', 'ind', 'kin', 'mar', 'tel'] STS s2s [Spoken, Written] None None
SensitiveTopicsClassification ['rus'] MultilabelClassification s2s [Social, Web, Written] None None
SentimentAnalysisHindi (Shantipriya Parida, 2023) ['hin'] Classification s2s [Reviews, Written] None None
SentimentDKSF ['fas'] Classification s2p [Reviews] None None
SinhalaNewsClassification (Nisansa de Silva, 2015) ['sin'] Classification s2s [News, Written] None None
SinhalaNewsSourceClassification (Dhananjaya et al., 2022) ['sin'] Classification s2s [News, Written] None None
SiswatiNewsClassification (Madodonga et al., 2023) ['ssw'] Classification s2s [News, Written] None None
SketchyI2IRetrieval (Ypsilantis et al., 2021) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 477886} {'test': {'number_of_characters': 0, 'num_samples': 477886, 'num_queries': 452886, 'num_documents': 25000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 25000, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 452886, 'min_relevant_docs_per_query': 100, 'average_relevant_docs_per_query': 100.0, 'max_relevant_docs_per_query': 100, 'unique_relevant_docs': 12500}}
SlovakHateSpeechClassification ['slk'] Classification s2s [Social, Written] {'test': 1319, 'train': 11870} {'test': {'num_samples': 1319, 'number_of_characters': 122279, 'num_texts_in_train': 46, 'min_text_length': 8, 'average_text_length': 92.71, 'max_text_length': 1584, 'unique_text': 1315, 'unique_labels': 2, 'labels': {'1': {'count': 360}, '0': {'count': 959}}}, 'train': {'num_samples': 11870, 'number_of_characters': 1130860, 'num_texts_in_train': None, 'min_text_length': 7, 'average_text_length': 95.27, 'max_text_length': 2112, 'unique_text': 11655, 'unique_labels': 2, 'labels': {'1': {'count': 3245}, '0': {'count': 8625}}}}
SlovakMovieReviewSentimentClassification ({�{S, 2023) ['svk'] Classification s2s [Reviews, Written] None None
SlovakSumRetrieval ['slk'] Retrieval s2s [News, Social, Web, Written] None None
SouthAfricanLangClassification (ExploreAI Academy et al., 2022) ['afr', 'eng', 'nbl', 'nso', 'sot', 'ssw', 'tsn', 'tso', 'ven', 'xho', 'zul'] Classification s2s [Non-fiction, Web, Written] None None
SpanishNewsClassification ['spa'] Classification s2s [News, Written] None None
SpanishNewsClusteringP2P ['spa'] Clustering p2p None None
SpanishPassageRetrievalS2P ['spa'] Retrieval s2p None None
SpanishPassageRetrievalS2S ['spa'] Retrieval s2s None None
SpanishSentimentClassification ['spa'] Classification s2s [Reviews, Written] None None
SpartQA (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
SprintDuplicateQuestions ['eng'] PairClassification s2s [Programming, Written] None None
StackExchangeClustering.v2 (Gregor Geigle and Nils Reimers and Andreas R{"u, 2021) ['eng'] Clustering s2s [Web, Written] None None
StackExchangeClusteringP2P.v2 (Gregor Geigle and Nils Reimers and Andreas R{"u, 2021) ['eng'] Clustering p2p [Web, Written] None None
StackOverflowDupQuestions (Xueqing Liu, 2018) ['eng'] Reranking s2s [Blog, Programming, Written] None None
StackOverflowQA (Xiangyang Li, 2024) ['eng'] Retrieval p2p [Programming, Written] {'test': 21925} {'test': {'number_of_characters': 26584028, 'num_samples': 21925, 'num_queries': 1994, 'num_documents': 19931, 'min_document_length': 61, 'average_document_length': 130.32, 'max_document_length': 22234, 'unique_documents': 19931, 'min_query_length': 5, 'average_query_length': 12029.38, 'max_query_length': 46028, 'unique_queries': 1994, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1994}}
StanfordCars (Jonathan Krause, 2013) ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 8041} {'test': {'num_samples': 8041, 'unique_num_labels': 196, 'min_image_width': 78, 'average_image_width': 701.18, 'max_image_width': 7800, 'min_image_height': 41, 'average_image_height': 483.75, 'max_image_height': 5400, 'labels': {'180': {'count': 38}, '102': {'count': 39}, '144': {'count': 44}, '186': {'count': 43}, '184': {'count': 38}, '77': {'count': 37}, '117': {'count': 41}, '164': {'count': 44}, '31': {'count': 41}, '59': {'count': 36}, '48': {'count': 37}, '107': {'count': 44}, '115': {'count': 37}, '134': {'count': 42}, '82': {'count': 40}, '50': {'count': 43}, '153': {'count': 42}, '32': {'count': 42}, '21': {'count': 42}, '150': {'count': 43}, '3': {'count': 42}, '80': {'count': 45}, '106': {'count': 44}, '190': {'count': 46}, '169': {'count': 44}, '194': {'count': 43}, '90': {'count': 38}, '4': {'count': 40}, '163': {'count': 43}, '147': {'count': 45}, '187': {'count': 43}, '43': {'count': 44}, '6': {'count': 39}, '30': {'count': 44}, '73': {'count': 43}, '29': {'count': 41}, '165': {'count': 41}, '179': {'count': 42}, '105': {'count': 41}, '2': {'count': 43}, '64': {'count': 45}, '34': {'count': 41}, '74': {'count': 44}, '84': {'count': 43}, '24': {'count': 39}, '167': {'count': 42}, '136': {'count': 43}, '133': {'count': 33}, '155': {'count': 39}, '119': {'count': 42}, '129': {'count': 41}, '127': {'count': 39}, '35': {'count': 41}, '170': {'count': 46}, '36': {'count': 38}, '63': {'count': 29}, '182': {'count': 42}, '42': {'count': 46}, '17': {'count': 42}, '75': {'count': 43}, '0': {'count': 44}, '62': {'count': 44}, '173': {'count': 41}, '16': {'count': 40}, '104': {'count': 43}, '49': {'count': 42}, '122': {'count': 44}, '81': {'count': 45}, '191': {'count': 42}, '92': {'count': 39}, '145': {'count': 43}, '95': {'count': 41}, '54': {'count': 39}, '114': {'count': 45}, '112': {'count': 42}, '151': {'count': 35}, '91': {'count': 40}, '188': {'count': 40}, '20': {'count': 42}, '33': {'count': 44}, '86': {'count': 44}, '128': {'count': 38}, '142': {'count': 40}, '19': {'count': 46}, '177': {'count': 41}, '11': {'count': 36}, '45': {'count': 43}, '60': {'count': 43}, '8': {'count': 41}, '56': {'count': 37}, '28': {'count': 42}, '120': {'count': 44}, '5': {'count': 44}, '85': {'count': 42}, '68': {'count': 38}, '22': {'count': 39}, '108': {'count': 44}, '89': {'count': 41}, '132': {'count': 42}, '125': {'count': 42}, '137': {'count': 39}, '158': {'count': 36}, '58': {'count': 44}, '123': {'count': 39}, '52': {'count': 44}, '27': {'count': 41}, '13': {'count': 42}, '70': {'count': 35}, '25': {'count': 34}, '185': {'count': 38}, '171': {'count': 44}, '9': {'count': 33}, '40': {'count': 35}, '178': {'count': 45}, '44': {'count': 32}, '97': {'count': 46}, '87': {'count': 39}, '159': {'count': 44}, '146': {'count': 44}, '51': {'count': 41}, '121': {'count': 40}, '1': {'count': 32}, '160': {'count': 48}, '78': {'count': 48}, '109': {'count': 43}, '103': {'count': 42}, '174': {'count': 30}, '181': {'count': 46}, '23': {'count': 45}, '111': {'count': 45}, '166': {'count': 47}, '172': {'count': 43}, '66': {'count': 38}, '192': {'count': 41}, '148': {'count': 42}, '72': {'count': 44}, '141': {'count': 32}, '71': {'count': 45}, '7': {'count': 45}, '152': {'count': 44}, '183': {'count': 40}, '98': {'count': 27}, '94': {'count': 45}, '126': {'count': 41}, '100': {'count': 42}, '131': {'count': 43}, '116': {'count': 42}, '39': {'count': 39}, '149': {'count': 36}, '101': {'count': 39}, '139': {'count': 42}, '69': {'count': 42}, '12': {'count': 41}, '14': {'count': 43}, '96': {'count': 42}, '41': {'count': 34}, '189': {'count': 43}, '10': {'count': 38}, '140': {'count': 34}, '26': {'count': 35}, '57': {'count': 44}, '88': {'count': 44}, '67': {'count': 40}, '93': {'count': 43}, '193': {'count': 45}, '161': {'count': 45}, '118': {'count': 68}, '110': {'count': 42}, '154': {'count': 42}, '138': {'count': 42}, '143': {'count': 46}, '61': {'count': 37}, '176': {'count': 44}, '113': {'count': 45}, '18': {'count': 40}, '53': {'count': 40}, '47': {'count': 42}, '157': {'count': 29}, '168': {'count': 38}, '124': {'count': 43}, '79': {'count': 43}, '130': {'count': 42}, '46': {'count': 35}, '55': {'count': 46}, '195': {'count': 40}, '38': {'count': 36}, '37': {'count': 40}, '99': {'count': 33}, '83': {'count': 42}, '162': {'count': 36}, '135': {'count': 24}, '175': {'count': 38}, '156': {'count': 36}, '15': {'count': 43}, '65': {'count': 41}, '76': {'count': 40}}}}
StanfordCarsI2IRetrieval (Jonathan Krause, 2013) ['eng'] Any2AnyRetrieval i2i [Encyclopaedic] {'test': 16082} {'test': {'number_of_characters': 0, 'num_samples': 16082, 'num_queries': 8041, 'num_documents': 8041, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 8041, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 8041, 'min_relevant_docs_per_query': 23, 'average_relevant_docs_per_query': 40.49, 'max_relevant_docs_per_query': 67, 'unique_relevant_docs': 8041}}
StanfordCarsZeroShot (Jonathan Krause, 2013) ['eng'] ZeroShotClassification i2t [Scene] {'test': 8041} {'test': {'num_samples': 8041, 'unique_num_labels': 196, 'min_image_width': 78, 'average_image_width': 701.18, 'max_image_width': 7800, 'min_image_height': 41, 'average_image_height': 483.75, 'max_image_height': 5400, 'min_label_text_length': 29, 'average_label_text_length': 40.83, 'max_label_text_length': 68, 'labels': {'180': {'count': 38}, '102': {'count': 39}, '144': {'count': 44}, '186': {'count': 43}, '184': {'count': 38}, '77': {'count': 37}, '117': {'count': 41}, '164': {'count': 44}, '31': {'count': 41}, '59': {'count': 36}, '48': {'count': 37}, '107': {'count': 44}, '115': {'count': 37}, '134': {'count': 42}, '82': {'count': 40}, '50': {'count': 43}, '153': {'count': 42}, '32': {'count': 42}, '21': {'count': 42}, '150': {'count': 43}, '3': {'count': 42}, '80': {'count': 45}, '106': {'count': 44}, '190': {'count': 46}, '169': {'count': 44}, '194': {'count': 43}, '90': {'count': 38}, '4': {'count': 40}, '163': {'count': 43}, '147': {'count': 45}, '187': {'count': 43}, '43': {'count': 44}, '6': {'count': 39}, '30': {'count': 44}, '73': {'count': 43}, '29': {'count': 41}, '165': {'count': 41}, '179': {'count': 42}, '105': {'count': 41}, '2': {'count': 43}, '64': {'count': 45}, '34': {'count': 41}, '74': {'count': 44}, '84': {'count': 43}, '24': {'count': 39}, '167': {'count': 42}, '136': {'count': 43}, '133': {'count': 33}, '155': {'count': 39}, '119': {'count': 42}, '129': {'count': 41}, '127': {'count': 39}, '35': {'count': 41}, '170': {'count': 46}, '36': {'count': 38}, '63': {'count': 29}, '182': {'count': 42}, '42': {'count': 46}, '17': {'count': 42}, '75': {'count': 43}, '0': {'count': 44}, '62': {'count': 44}, '173': {'count': 41}, '16': {'count': 40}, '104': {'count': 43}, '49': {'count': 42}, '122': {'count': 44}, '81': {'count': 45}, '191': {'count': 42}, '92': {'count': 39}, '145': {'count': 43}, '95': {'count': 41}, '54': {'count': 39}, '114': {'count': 45}, '112': {'count': 42}, '151': {'count': 35}, '91': {'count': 40}, '188': {'count': 40}, '20': {'count': 42}, '33': {'count': 44}, '86': {'count': 44}, '128': {'count': 38}, '142': {'count': 40}, '19': {'count': 46}, '177': {'count': 41}, '11': {'count': 36}, '45': {'count': 43}, '60': {'count': 43}, '8': {'count': 41}, '56': {'count': 37}, '28': {'count': 42}, '120': {'count': 44}, '5': {'count': 44}, '85': {'count': 42}, '68': {'count': 38}, '22': {'count': 39}, '108': {'count': 44}, '89': {'count': 41}, '132': {'count': 42}, '125': {'count': 42}, '137': {'count': 39}, '158': {'count': 36}, '58': {'count': 44}, '123': {'count': 39}, '52': {'count': 44}, '27': {'count': 41}, '13': {'count': 42}, '70': {'count': 35}, '25': {'count': 34}, '185': {'count': 38}, '171': {'count': 44}, '9': {'count': 33}, '40': {'count': 35}, '178': {'count': 45}, '44': {'count': 32}, '97': {'count': 46}, '87': {'count': 39}, '159': {'count': 44}, '146': {'count': 44}, '51': {'count': 41}, '121': {'count': 40}, '1': {'count': 32}, '160': {'count': 48}, '78': {'count': 48}, '109': {'count': 43}, '103': {'count': 42}, '174': {'count': 30}, '181': {'count': 46}, '23': {'count': 45}, '111': {'count': 45}, '166': {'count': 47}, '172': {'count': 43}, '66': {'count': 38}, '192': {'count': 41}, '148': {'count': 42}, '72': {'count': 44}, '141': {'count': 32}, '71': {'count': 45}, '7': {'count': 45}, '152': {'count': 44}, '183': {'count': 40}, '98': {'count': 27}, '94': {'count': 45}, '126': {'count': 41}, '100': {'count': 42}, '131': {'count': 43}, '116': {'count': 42}, '39': {'count': 39}, '149': {'count': 36}, '101': {'count': 39}, '139': {'count': 42}, '69': {'count': 42}, '12': {'count': 41}, '14': {'count': 43}, '96': {'count': 42}, '41': {'count': 34}, '189': {'count': 43}, '10': {'count': 38}, '140': {'count': 34}, '26': {'count': 35}, '57': {'count': 44}, '88': {'count': 44}, '67': {'count': 40}, '93': {'count': 43}, '193': {'count': 45}, '161': {'count': 45}, '118': {'count': 68}, '110': {'count': 42}, '154': {'count': 42}, '138': {'count': 42}, '143': {'count': 46}, '61': {'count': 37}, '176': {'count': 44}, '113': {'count': 45}, '18': {'count': 40}, '53': {'count': 40}, '47': {'count': 42}, '157': {'count': 29}, '168': {'count': 38}, '124': {'count': 43}, '79': {'count': 43}, '130': {'count': 42}, '46': {'count': 35}, '55': {'count': 46}, '195': {'count': 40}, '38': {'count': 36}, '37': {'count': 40}, '99': {'count': 33}, '83': {'count': 42}, '162': {'count': 36}, '135': {'count': 24}, '175': {'count': 38}, '156': {'count': 36}, '15': {'count': 43}, '65': {'count': 41}, '76': {'count': 40}}}}
StatcanDialogueDatasetRetrieval ['eng', 'fra'] Retrieval s2p [Government, Web, Written] None None
SugarCrepe (Hsieh et al., 2024) ['eng'] Compositionality i2t [Encyclopaedic] {'test': 7511} {'test': {'num_samples': 7511, 'num_images': 7511, 'num_texts': 15022, 'num_unique_texts': 11844, 'min_text_length': 24, 'average_text_length': 56.49, 'max_text_length': 210}}
SummEvalFrSummarization.v2 (Fabbri et al., 2020) ['fra'] Summarization p2p [News, Written] None None
SummEvalSummarization.v2 (Fabbri et al., 2020) ['eng'] Summarization p2p [News, Written] None None
SwahiliNewsClassification ['swa'] Classification s2s [News, Written] None None
SweFaqRetrieval (Berdi{�{c, 2023) ['swe'] Retrieval s2s [Government, Non-fiction, Written] None None
SweRecClassification ['swe'] Classification s2s [Reviews, Written] None None
SwedishSentimentClassification ['swe'] Classification s2s [Reviews, Written] None None
SwednClusteringP2P (Monsen et al., 2021) ['swe'] Clustering p2p [News, Non-fiction, Written] None None
SwednClusteringS2S (Monsen et al., 2021) ['swe'] Clustering s2s [News, Non-fiction, Written] None None
SwednRetrieval (Monsen et al., 2021) ['swe'] Retrieval p2p [News, Non-fiction, Written] None None
SwissJudgementClassification (Joel Niklaus, 2022) ['deu', 'fra', 'ita'] Classification s2s [Legal, Written] None None
SynPerChatbotConvSAAnger ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAClassification ['fas'] Classification None [Spoken] None None
SynPerChatbotConvSAFear ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAFriendship ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAHappiness ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAJealousy ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSALove ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSASadness ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSASatisfaction ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSASurprise ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAToneChatbotClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotConvSAToneUserClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotRAGFAQPC ['fas'] PairClassification s2p [Spoken] None None
SynPerChatbotRAGFAQRetrieval ['fas'] Retrieval s2p [Spoken] None None
SynPerChatbotRAGSumSRetrieval ['fas'] BitextMining p2p [Spoken] None None
SynPerChatbotRAGToneChatbotClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotRAGToneUserClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotRAGTopicsRetrieval ['fas'] Retrieval s2p [Spoken] None None
SynPerChatbotSatisfactionLevelClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotSumSRetrieval ['fas'] BitextMining p2p [Spoken] None None
SynPerChatbotToneChatbotClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotToneUserClassification ['fas'] Classification p2p [Spoken] None None
SynPerChatbotTopicsRetrieval ['fas'] Retrieval s2p [Spoken] None None
SynPerQAPC ['fas'] PairClassification s2p [Blog, News, Religious, Web] None None
SynPerQARetrieval ['fas'] Retrieval s2p [Web] None None
SynPerSTS ['fas'] STS s2s [Blog, News, Religious, Web] None None
SynPerTextKeywordsPC ['fas'] PairClassification s2p [Blog, News, Religious, Web] None None
SyntecReranking (Mathieu Ciancone, 2024) ['fra'] Reranking s2p [Legal, Written] None None
SyntecRetrieval (Mathieu Ciancone, 2024) ['fra'] Retrieval s2p [Legal, Written] None None
SyntheticText2SQL (Meyer et al., 2024) ['eng', 'sql'] Retrieval p2p [Programming, Written] {'test': 111702} {'test': {'number_of_characters': 14041553, 'num_samples': 111702, 'num_queries': 5851, 'num_documents': 105851, 'min_document_length': 13, 'average_document_length': 4.58, 'max_document_length': 281, 'unique_documents': 105851, 'min_query_length': 17, 'average_query_length': 2316.95, 'max_query_length': 762, 'unique_queries': 5851, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 5851}}
T2Reranking (Xiaohui Xie, 2023) ['cmn'] Reranking s2s None None
T2Retrieval (Xiaohui Xie, 2023) ['cmn'] Retrieval s2p None None
TERRa (Shavrina et al., 2020) ['rus'] PairClassification s2s [News, Web, Written] None None
TNews ['cmn'] Classification s2s None None
TRECCOVID (Kirk Roberts, 2021) ['eng'] Retrieval s2p [Academic, Medical, Written] None None
TRECCOVID-Fa ['fas'] Retrieval s2p [Medical] None None
TRECCOVID-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Academic, Medical, Written] None None
TRECCOVID-PL (Konrad Wojtasik, 2024) ['pol'] Retrieval s2p [Academic, Medical, Non-fiction, Written] None None
TUBerlinT2IRetrieval (Eitz et al., 2012) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 20250} {'test': {'number_of_characters': 1810, 'num_samples': 20250, 'num_queries': 250, 'num_documents': 20000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 20000, 'min_query_length': 2, 'average_query_length': 7.24, 'max_query_length': 18, 'unique_queries': 250, 'num_query_images': 0, 'min_relevant_docs_per_query': 80, 'average_relevant_docs_per_query': 80.0, 'max_relevant_docs_per_query': 80, 'unique_relevant_docs': 20000}}
TV2Nordretrieval ['dan'] Retrieval p2p [News, Non-fiction, Written] None None
TamilNewsClassification (Anoop Kunchukuttan, 2020) ['tam'] Classification s2s [News, Written] None None
Tatoeba (Tatoeba community, 2021) ['afr', 'amh', 'ang', 'ara', 'arq', 'arz', 'ast', 'awa', 'aze', 'bel', 'ben', 'ber', 'bos', 'bre', 'bul', 'cat', 'cbk', 'ceb', 'ces', 'cha', 'cmn', 'cor', 'csb', 'cym', 'dan', 'deu', 'dsb', 'dtp', 'ell', 'eng', 'epo', 'est', 'eus', 'fao', 'fin', 'fra', 'fry', 'gla', 'gle', 'glg', 'gsw', 'heb', 'hin', 'hrv', 'hsb', 'hun', 'hye', 'ido', 'ile', 'ina', 'ind', 'isl', 'ita', 'jav', 'jpn', 'kab', 'kat', 'kaz', 'khm', 'kor', 'kur', 'kzj', 'lat', 'lfn', 'lit', 'lvs', 'mal', 'mar', 'max', 'mhr', 'mkd', 'mon', 'nds', 'nld', 'nno', 'nob', 'nov', 'oci', 'orv', 'pam', 'pes', 'pms', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'swg', 'swh', 'tam', 'tat', 'tel', 'tgl', 'tha', 'tuk', 'tur', 'tzl', 'uig', 'ukr', 'urd', 'uzb', 'vie', 'war', 'wuu', 'xho', 'yid', 'yue', 'zsm'] BitextMining s2s [Written] None None
TbilisiCityHallBitextMining ['eng', 'kat'] BitextMining s2s [News, Written] {'test': 3640} {'test': {'num_samples': 3640, 'number_of_characters': 572146, 'unique_pairs': 3640, 'min_sentence1_length': 13, 'average_sentence1_length': 78.59, 'max_sentence1_length': 203, 'unique_sentence1': 3636, 'min_sentence2_length': 13, 'average_sentence2_length': 78.59, 'max_sentence2_length': 203, 'unique_sentence2': 3636, 'hf_subset_descriptive_stats': {'kat_Geor-eng_Latn': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 30, 'average_sentence1_length': 76.07, 'max_sentence1_length': 189, 'unique_sentence1': 1820, 'min_sentence2_length': 13, 'average_sentence2_length': 81.12, 'max_sentence2_length': 203, 'unique_sentence2': 1816}, 'eng_Latn-kat_Geor': {'num_samples': 1820, 'number_of_characters': 286073, 'unique_pairs': 1820, 'min_sentence1_length': 13, 'average_sentence1_length': 81.12, 'max_sentence1_length': 203, 'unique_sentence1': 1816, 'min_sentence2_length': 30, 'average_sentence2_length': 76.07, 'max_sentence2_length': 189, 'unique_sentence2': 1820}}}}
TelemarketingSalesRuleLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
TeluguAndhraJyotiNewsClassification ['tel'] Classification s2s [News, Written] None None
TempReasonL1 (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL2Context (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL2Fact (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL2Pure (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL3Context (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL3Fact (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TempReasonL3Pure (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
TenKGnadClassification ['deu'] Classification p2p [News, Written] None None
TenKGnadClusteringP2P.v2 ['deu'] Clustering p2p [News, Non-fiction, Written] None None
TenKGnadClusteringS2S.v2 ['deu'] Clustering s2s [News, Non-fiction, Written] None None
TextualismToolDictionariesLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
TextualismToolPlainLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
ThuNewsClusteringP2P.v2 (Sun et al., 2016) ['cmn'] Clustering p2p [News, Written] None None
ThuNewsClusteringS2S.v2 (Sun et al., 2016) ['cmn'] Clustering s2s [News, Written] None None
TinyImageNetClustering ['eng'] ImageClustering i2i [Reviews] {'valid': 10000} {'valid': {'num_samples': 10000, 'unique_num_labels': 200, 'min_image_width': 64, 'average_image_width': 64.0, 'max_image_width': 64, 'min_image_height': 64, 'average_image_height': 64.0, 'max_image_height': 64, 'labels': {'0': {'count': 50}, '1': {'count': 50}, '2': {'count': 50}, '3': {'count': 50}, '4': {'count': 50}, '5': {'count': 50}, '6': {'count': 50}, '7': {'count': 50}, '8': {'count': 50}, '9': {'count': 50}, '10': {'count': 50}, '11': {'count': 50}, '12': {'count': 50}, '13': {'count': 50}, '14': {'count': 50}, '15': {'count': 50}, '16': {'count': 50}, '17': {'count': 50}, '18': {'count': 50}, '19': {'count': 50}, '20': {'count': 50}, '21': {'count': 50}, '22': {'count': 50}, '23': {'count': 50}, '24': {'count': 50}, '25': {'count': 50}, '26': {'count': 50}, '27': {'count': 50}, '28': {'count': 50}, '29': {'count': 50}, '30': {'count': 50}, '31': {'count': 50}, '32': {'count': 50}, '33': {'count': 50}, '34': {'count': 50}, '35': {'count': 50}, '36': {'count': 50}, '37': {'count': 50}, '38': {'count': 50}, '39': {'count': 50}, '40': {'count': 50}, '41': {'count': 50}, '42': {'count': 50}, '43': {'count': 50}, '44': {'count': 50}, '45': {'count': 50}, '46': {'count': 50}, '47': {'count': 50}, '48': {'count': 50}, '49': {'count': 50}, '50': {'count': 50}, '51': {'count': 50}, '52': {'count': 50}, '53': {'count': 50}, '54': {'count': 50}, '55': {'count': 50}, '56': {'count': 50}, '57': {'count': 50}, '58': {'count': 50}, '59': {'count': 50}, '60': {'count': 50}, '61': {'count': 50}, '62': {'count': 50}, '63': {'count': 50}, '64': {'count': 50}, '65': {'count': 50}, '66': {'count': 50}, '67': {'count': 50}, '68': {'count': 50}, '69': {'count': 50}, '70': {'count': 50}, '71': {'count': 50}, '72': {'count': 50}, '73': {'count': 50}, '74': {'count': 50}, '75': {'count': 50}, '76': {'count': 50}, '77': {'count': 50}, '78': {'count': 50}, '79': {'count': 50}, '80': {'count': 50}, '81': {'count': 50}, '82': {'count': 50}, '83': {'count': 50}, '84': {'count': 50}, '85': {'count': 50}, '86': {'count': 50}, '87': {'count': 50}, '88': {'count': 50}, '89': {'count': 50}, '90': {'count': 50}, '91': {'count': 50}, '92': {'count': 50}, '93': {'count': 50}, '94': {'count': 50}, '95': {'count': 50}, '96': {'count': 50}, '97': {'count': 50}, '98': {'count': 50}, '99': {'count': 50}, '100': {'count': 50}, '101': {'count': 50}, '102': {'count': 50}, '103': {'count': 50}, '104': {'count': 50}, '105': {'count': 50}, '106': {'count': 50}, '107': {'count': 50}, '108': {'count': 50}, '109': {'count': 50}, '110': {'count': 50}, '111': {'count': 50}, '112': {'count': 50}, '113': {'count': 50}, '114': {'count': 50}, '115': {'count': 50}, '116': {'count': 50}, '117': {'count': 50}, '118': {'count': 50}, '119': {'count': 50}, '120': {'count': 50}, '121': {'count': 50}, '122': {'count': 50}, '123': {'count': 50}, '124': {'count': 50}, '125': {'count': 50}, '126': {'count': 50}, '127': {'count': 50}, '128': {'count': 50}, '129': {'count': 50}, '130': {'count': 50}, '131': {'count': 50}, '132': {'count': 50}, '133': {'count': 50}, '134': {'count': 50}, '135': {'count': 50}, '136': {'count': 50}, '137': {'count': 50}, '138': {'count': 50}, '139': {'count': 50}, '140': {'count': 50}, '141': {'count': 50}, '142': {'count': 50}, '143': {'count': 50}, '144': {'count': 50}, '145': {'count': 50}, '146': {'count': 50}, '147': {'count': 50}, '148': {'count': 50}, '149': {'count': 50}, '150': {'count': 50}, '151': {'count': 50}, '152': {'count': 50}, '153': {'count': 50}, '154': {'count': 50}, '155': {'count': 50}, '156': {'count': 50}, '157': {'count': 50}, '158': {'count': 50}, '159': {'count': 50}, '160': {'count': 50}, '161': {'count': 50}, '162': {'count': 50}, '163': {'count': 50}, '164': {'count': 50}, '165': {'count': 50}, '166': {'count': 50}, '167': {'count': 50}, '168': {'count': 50}, '169': {'count': 50}, '170': {'count': 50}, '171': {'count': 50}, '172': {'count': 50}, '173': {'count': 50}, '174': {'count': 50}, '175': {'count': 50}, '176': {'count': 50}, '177': {'count': 50}, '178': {'count': 50}, '179': {'count': 50}, '180': {'count': 50}, '181': {'count': 50}, '182': {'count': 50}, '183': {'count': 50}, '184': {'count': 50}, '185': {'count': 50}, '186': {'count': 50}, '187': {'count': 50}, '188': {'count': 50}, '189': {'count': 50}, '190': {'count': 50}, '191': {'count': 50}, '192': {'count': 50}, '193': {'count': 50}, '194': {'count': 50}, '195': {'count': 50}, '196': {'count': 50}, '197': {'count': 50}, '198': {'count': 50}, '199': {'count': 50}}}}
TopiOCQA (Vaibhav Adlakha, 2022) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
TopiOCQAHardNegatives (Vaibhav Adlakha, 2022) ['eng'] Retrieval s2p [Encyclopaedic, Written] None None
Touche2020-Fa ['fas'] Retrieval s2p [Spoken] None None
Touche2020-NL (Nikolay Banar, 2024) ['nld'] Retrieval s2p [Academic, Non-fiction] None None
Touche2020Retrieval.v3 ['eng'] Retrieval s2p [Academic] {'test': 303781} {'test': {'number_of_characters': 637047138, 'num_samples': 303781, 'num_queries': 49, 'num_documents': 303732, 'min_document_length': 16, 'average_document_length': 0.01, 'max_document_length': 83, 'unique_documents': 303732, 'min_query_length': 41, 'average_query_length': 13000918.57, 'max_query_length': 105983, 'unique_queries': 49, 'min_relevant_docs_per_query': 40, 'average_relevant_docs_per_query': 58.14, 'max_relevant_docs_per_query': 87, 'unique_relevant_docs': 2732}}
ToxicChatClassification (Zi Lin, 2023) ['eng'] Classification s2s [Constructed, Written] None None
ToxicConversationsClassification (cjadams, 2019) ['eng'] Classification s2s [Social, Written] None None
TswanaNewsClassification (Vukosi Marivate, 2023) ['tsn'] Classification s2s [News, Written] None None
TurHistQuadRetrieval (Soygazi et al., 2021) ['tur'] Retrieval p2p [Academic, Encyclopaedic, Non-fiction, Written] None None
TurkicClassification ['bak', 'kaz', 'kir'] Classification s2s [News, Written] None None
TurkishMovieSentimentClassification (Erkin Demirtas, 2013) ['tur'] Classification s2s [Reviews, Written] None None
TurkishProductSentimentClassification (Erkin Demirtas, 2013) ['tur'] Classification s2s [Reviews, Written] None None
TweetEmotionClassification (Al-Khatib et al., 2018) ['ara'] Classification s2s [Social, Written] None None
TweetSarcasmClassification ['ara'] Classification s2s [Social, Written] None None
TweetSentimentClassification ['ara', 'deu', 'eng', 'fra', 'hin', 'ita', 'por', 'spa'] Classification s2s [Social, Written] None None
TweetSentimentExtractionClassification (Maggie et al., 2020) ['eng'] Classification s2s [Social, Written] None None
TweetTopicSingleClassification ['eng'] Classification s2s [News, Social, Written] None None
TwentyNewsgroupsClustering.v2 (Ken Lang, 1995) ['eng'] Clustering s2s [News, Written] {'test': 59545} {'test': {'num_samples': 59545, 'number_of_characters': 1907719, 'min_text_length': 11, 'average_text_length': 32.04, 'max_text_length': 120, 'min_labels_per_text': 2082, 'average_labels_per_text': 1.0, 'max_labels_per_text': 3236, 'unique_labels': 20, 'labels': {'12': {'count': 3137}, '6': {'count': 3070}, '0': {'count': 2613}, '2': {'count': 3155}, '10': {'count': 3220}, '17': {'count': 2986}, '14': {'count': 3106}, '13': {'count': 3055}, '1': {'count': 3056}, '16': {'count': 2911}, '9': {'count': 2984}, '3': {'count': 3070}, '15': {'count': 3090}, '7': {'count': 3036}, '5': {'count': 3124}, '11': {'count': 3236}, '18': {'count': 2483}, '8': {'count': 3090}, '19': {'count': 2082}, '4': {'count': 3041}}}}
TwitterHjerneRetrieval (Holm et al., 2024) ['dan'] Retrieval p2p [Social, Written] None None
TwitterSemEval2015 ['eng'] PairClassification s2s [Social, Written] None None
TwitterURLCorpus ['eng'] PairClassification s2s [Social, Written] {'test': 51534} {'test': {'num_samples': 51534, 'number_of_characters': 8659940, 'min_sentence1_length': 24, 'avg_sentence1_length': 79.49, 'max_sentence1_length': 126, 'unique_sentence1': 4329, 'min_sentence2_length': 6, 'avg_sentence2_length': 88.55, 'max_sentence2_length': 608, 'unique_sentence2': 41304, 'unique_labels': 2, 'labels': {'0': {'count': 38546}, '1': {'count': 12988}}}}
UCCVCommonLawLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
UCF101 (Khurram Soomro, 2012) ['eng'] ImageClassification i2i [Scene] {'test': 697222} {'test': {'num_samples': 697222, 'unique_num_labels': 101, 'min_image_width': 320, 'average_image_width': 320.12, 'max_image_width': 400, 'min_image_height': 226, 'average_image_height': 239.98, 'max_image_height': 240, 'labels': {'0': {'count': 7475}, '1': {'count': 6341}, '2': {'count': 6181}, '3': {'count': 6320}, '4': {'count': 3708}, '5': {'count': 7296}, '6': {'count': 4004}, '7': {'count': 3923}, '8': {'count': 2267}, '9': {'count': 5587}, '10': {'count': 8946}, '11': {'count': 12714}, '12': {'count': 6053}, '13': {'count': 3191}, '14': {'count': 3696}, '15': {'count': 5468}, '16': {'count': 10032}, '17': {'count': 8346}, '18': {'count': 5098}, '19': {'count': 10811}, '20': {'count': 6378}, '21': {'count': 3385}, '22': {'count': 3974}, '23': {'count': 4781}, '24': {'count': 5867}, '25': {'count': 7904}, '26': {'count': 12181}, '27': {'count': 4511}, '28': {'count': 4402}, '29': {'count': 5513}, '30': {'count': 3236}, '31': {'count': 7160}, '32': {'count': 6455}, '33': {'count': 3766}, '34': {'count': 8362}, '35': {'count': 3521}, '36': {'count': 3263}, '37': {'count': 5112}, '38': {'count': 9685}, '39': {'count': 4598}, '40': {'count': 6682}, '41': {'count': 8690}, '42': {'count': 3591}, '43': {'count': 11432}, '44': {'count': 3458}, '45': {'count': 10080}, '46': {'count': 16507}, '47': {'count': 3001}, '48': {'count': 6524}, '49': {'count': 7786}, '50': {'count': 4657}, '51': {'count': 8795}, '52': {'count': 3992}, '53': {'count': 5668}, '54': {'count': 6575}, '55': {'count': 8662}, '56': {'count': 5253}, '57': {'count': 3761}, '58': {'count': 8679}, '59': {'count': 11986}, '60': {'count': 15720}, '61': {'count': 12080}, '62': {'count': 10634}, '63': {'count': 6161}, '64': {'count': 13934}, '65': {'count': 8393}, '66': {'count': 5452}, '67': {'count': 7905}, '68': {'count': 12354}, '69': {'count': 4060}, '70': {'count': 9075}, '71': {'count': 2689}, '72': {'count': 5435}, '73': {'count': 17655}, '74': {'count': 5693}, '75': {'count': 12572}, '76': {'count': 9543}, '77': {'count': 10793}, '78': {'count': 4134}, '79': {'count': 4832}, '80': {'count': 8977}, '81': {'count': 7381}, '82': {'count': 4927}, '83': {'count': 12469}, '84': {'count': 3843}, '85': {'count': 4945}, '86': {'count': 6724}, '87': {'count': 6582}, '88': {'count': 7046}, '89': {'count': 5874}, '90': {'count': 4878}, '91': {'count': 6417}, '92': {'count': 3762}, '93': {'count': 7349}, '94': {'count': 8149}, '95': {'count': 3925}, '96': {'count': 3378}, '97': {'count': 7721}, '98': {'count': 3671}, '99': {'count': 6292}, '100': {'count': 6508}}}}
UCF101ZeroShot (Khurram Soomro, 2012) ['eng'] ZeroShotClassification i2t [Scene] {'test': 697222} {'test': {'num_samples': 697222, 'unique_num_labels': 101, 'min_image_width': 320, 'average_image_width': 320.12, 'max_image_width': 400, 'min_image_height': 226, 'average_image_height': 239.98, 'max_image_height': 240, 'min_label_text_length': 15, 'average_label_text_length': 21.77, 'max_label_text_length': 29, 'labels': {'0': {'count': 7475}, '1': {'count': 6341}, '2': {'count': 6181}, '3': {'count': 6320}, '4': {'count': 3708}, '5': {'count': 7296}, '6': {'count': 4004}, '7': {'count': 3923}, '8': {'count': 2267}, '9': {'count': 5587}, '10': {'count': 8946}, '11': {'count': 12714}, '12': {'count': 6053}, '13': {'count': 3191}, '14': {'count': 3696}, '15': {'count': 5468}, '16': {'count': 10032}, '17': {'count': 8346}, '18': {'count': 5098}, '19': {'count': 10811}, '20': {'count': 6378}, '21': {'count': 3385}, '22': {'count': 3974}, '23': {'count': 4781}, '24': {'count': 5867}, '25': {'count': 7904}, '26': {'count': 12181}, '27': {'count': 4511}, '28': {'count': 4402}, '29': {'count': 5513}, '30': {'count': 3236}, '31': {'count': 7160}, '32': {'count': 6455}, '33': {'count': 3766}, '34': {'count': 8362}, '35': {'count': 3521}, '36': {'count': 3263}, '37': {'count': 5112}, '38': {'count': 9685}, '39': {'count': 4598}, '40': {'count': 6682}, '41': {'count': 8690}, '42': {'count': 3591}, '43': {'count': 11432}, '44': {'count': 3458}, '45': {'count': 10080}, '46': {'count': 16507}, '47': {'count': 3001}, '48': {'count': 6524}, '49': {'count': 7786}, '50': {'count': 4657}, '51': {'count': 8795}, '52': {'count': 3992}, '53': {'count': 5668}, '54': {'count': 6575}, '55': {'count': 8662}, '56': {'count': 5253}, '57': {'count': 3761}, '58': {'count': 8679}, '59': {'count': 11986}, '60': {'count': 15720}, '61': {'count': 12080}, '62': {'count': 10634}, '63': {'count': 6161}, '64': {'count': 13934}, '65': {'count': 8393}, '66': {'count': 5452}, '67': {'count': 7905}, '68': {'count': 12354}, '69': {'count': 4060}, '70': {'count': 9075}, '71': {'count': 2689}, '72': {'count': 5435}, '73': {'count': 17655}, '74': {'count': 5693}, '75': {'count': 12572}, '76': {'count': 9543}, '77': {'count': 10793}, '78': {'count': 4134}, '79': {'count': 4832}, '80': {'count': 8977}, '81': {'count': 7381}, '82': {'count': 4927}, '83': {'count': 12469}, '84': {'count': 3843}, '85': {'count': 4945}, '86': {'count': 6724}, '87': {'count': 6582}, '88': {'count': 7046}, '89': {'count': 5874}, '90': {'count': 4878}, '91': {'count': 6417}, '92': {'count': 3762}, '93': {'count': 7349}, '94': {'count': 8149}, '95': {'count': 3925}, '96': {'count': 3378}, '97': {'count': 7721}, '98': {'count': 3671}, '99': {'count': 6292}, '100': {'count': 6508}}}}
UkrFormalityClassification ['ukr'] Classification s2s [News, Written] None None
UnfairTOSLegalBenchClassification (Neel Guha, 2023) ['eng'] Classification s2s [Legal, Written] None None
UrduRomanSentimentClassification (Sharf,Zareen, 2018) ['urd'] Classification s2s [Social, Written] None None
VDRMultilingualRetrieval (LlamaIndex, 2025) ['deu', 'eng', 'fra', 'ita', 'spa'] Retrieval it2it [Web] None None
VGHierarchicalClusteringP2P (Navjord et al., 2023) ['nob'] Clustering p2p [News, Non-fiction, Written] None None
VGHierarchicalClusteringS2S (Navjord et al., 2023) ['nob'] Clustering p2p [News, Non-fiction, Written] None None
VOC2007 ['eng'] ImageClassification i2i [Encyclopaedic] {'test': 4952} {'test': {'num_samples': 4952, 'min_image_width': 148, 'average_image_width': 471.25, 'max_image_width': 500, 'min_image_height': 139, 'average_image_height': 381.54, 'max_image_height': 500, 'min_labels_per_sample': 1, 'average_label_per_sample': 1.42, 'max_labels_per_sample': 5, 'unique_num_labels': 20, 'labels': {'14': {'count': 2007}, '11': {'count': 418}, '18': {'count': 259}, '17': {'count': 223}, '8': {'count': 417}, '6': {'count': 721}, '15': {'count': 224}, '10': {'count': 190}, '12': {'count': 274}, '7': {'count': 322}, '9': {'count': 127}, '5': {'count': 174}, '1': {'count': 239}, '13': {'count': 222}, '2': {'count': 282}, '19': {'count': 229}, '16': {'count': 97}, '0': {'count': 204}, '3': {'count': 172}, '4': {'count': 212}}}}
VQA2IT2TRetrieval (Goyal et al., 2017) ['eng'] Any2AnyRetrieval it2t [Web] {'test': 235951} {'test': {'number_of_characters': 6850685, 'num_samples': 235951, 'num_queries': 214354, 'num_documents': 21597, 'min_document_length': 1, 'average_document_length': 11.19, 'max_document_length': 99, 'unique_documents': 21597, 'num_document_images': 0, 'min_query_length': 10, 'average_query_length': 30.83, 'max_query_length': 100, 'unique_queries': 81565, 'num_query_images': 214354, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 21597}}
VideoRetrieval ['cmn'] Retrieval s2p None None
VidoreArxivQARetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1000} {'test': {'number_of_characters': 49664, 'num_samples': 1000, 'num_queries': 500, 'num_documents': 500, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 500, 'min_query_length': 37, 'average_query_length': 99.33, 'max_query_length': 200, 'unique_queries': 500, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 500}}
VidoreDocVQARetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 951} {'test': {'number_of_characters': 19499, 'num_samples': 951, 'num_queries': 451, 'num_documents': 500, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 500, 'min_query_length': 13, 'average_query_length': 43.24, 'max_query_length': 128, 'unique_queries': 451, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.11, 'max_relevant_docs_per_query': 12, 'unique_relevant_docs': 500}}
VidoreInfoVQARetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 994} {'test': {'number_of_characters': 32253, 'num_samples': 994, 'num_queries': 494, 'num_documents': 500, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 500, 'min_query_length': 21, 'average_query_length': 65.29, 'max_query_length': 167, 'unique_queries': 494, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.01, 'max_relevant_docs_per_query': 5, 'unique_relevant_docs': 500}}
VidoreShiftProjectRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1100} {'test': {'number_of_characters': 9770, 'num_samples': 1100, 'num_queries': 100, 'num_documents': 1000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 1000, 'min_query_length': 52, 'average_query_length': 97.7, 'max_query_length': 154, 'unique_queries': 100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 100}}
VidoreSyntheticDocQAAIRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1068} {'test': {'number_of_characters': 7771, 'num_samples': 1068, 'num_queries': 100, 'num_documents': 968, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 968, 'min_query_length': 30, 'average_query_length': 77.71, 'max_query_length': 142, 'unique_queries': 100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 68}}
VidoreSyntheticDocQAEnergyRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1077} {'test': {'number_of_characters': 8369, 'num_samples': 1077, 'num_queries': 100, 'num_documents': 977, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 977, 'min_query_length': 36, 'average_query_length': 83.69, 'max_query_length': 154, 'unique_queries': 100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 77}}
VidoreSyntheticDocQAGovernmentReportsRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1072} {'test': {'number_of_characters': 8253, 'num_samples': 1072, 'num_queries': 100, 'num_documents': 972, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 972, 'min_query_length': 24, 'average_query_length': 82.53, 'max_query_length': 157, 'unique_queries': 100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 72}}
VidoreSyntheticDocQAHealthcareIndustryRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1065} {'test': {'number_of_characters': 8043, 'num_samples': 1065, 'num_queries': 100, 'num_documents': 965, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 965, 'min_query_length': 37, 'average_query_length': 80.43, 'max_query_length': 148, 'unique_queries': 100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 65}}
VidoreTabfquadRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 350} {'test': {'number_of_characters': 28177, 'num_samples': 350, 'num_queries': 280, 'num_documents': 70, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 70, 'min_query_length': 33, 'average_query_length': 100.63, 'max_query_length': 184, 'unique_queries': 280, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 70}}
VidoreTatdqaRetrieval (Faysse et al., 2024) ['eng'] DocumentUnderstanding t2i [Academic] {'test': 1923} {'test': {'number_of_characters': 120235, 'num_samples': 1923, 'num_queries': 1646, 'num_documents': 277, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 277, 'min_query_length': 15, 'average_query_length': 73.05, 'max_query_length': 194, 'unique_queries': 1646, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.01, 'max_relevant_docs_per_query': 10, 'unique_relevant_docs': 277}}
VieMedEVBitextMining (Nhu Vo, 2024) ['eng', 'vie'] BitextMining s2s [Medical, Written] {'test': 2048} {'test': {'num_samples': 2048, 'number_of_characters': 575910, 'unique_pairs': 2048, 'min_sentence1_length': 11, 'average_sentence1_length': 139.23, 'max_sentence1_length': 1291, 'unique_sentence1': 2048, 'min_sentence2_length': 11, 'average_sentence2_length': 141.98, 'max_sentence2_length': 1217, 'unique_sentence2': 2047}}
VieQuADRetrieval ['vie'] Retrieval s2p [Encyclopaedic, Non-fiction, Written] None None
VieStudentFeedbackClassification (Nguyen et al., 2018) ['vie'] Classification s2s [Reviews, Written] None None
VisualNewsI2TRetrieval (Liu et al., 2021) ['eng'] Any2AnyRetrieval i2t [Encyclopaedic] {'test': 557568} {'test': {'number_of_characters': 58422402, 'num_samples': 557568, 'num_queries': 20000, 'num_documents': 537568, 'min_document_length': 2, 'average_document_length': 108.68, 'max_document_length': 2751, 'unique_documents': 537568, 'num_document_images': 0, 'min_query_length': 0, 'average_query_length': 0, 'max_query_length': 0, 'unique_queries': 0, 'num_query_images': 20000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 19995}}
VisualNewsT2IRetrieval (Liu et al., 2021) ['eng'] Any2AnyRetrieval t2i [Encyclopaedic] {'test': 562241} {'test': {'number_of_characters': 2204490, 'num_samples': 562241, 'num_queries': 19995, 'num_documents': 542246, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 542246, 'min_query_length': 2, 'average_query_length': 110.25, 'max_query_length': 1356, 'unique_queries': 19995, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 20000}}
VisualSTS-b-Eng (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [News, Social, Spoken, Web, Written] None None
VisualSTS-b-Multilingual (Xiao et al., 2024) ['cmn', 'deu', 'fra', 'ita', 'nld', 'pol', 'por', 'rus', 'spa'] VisualSTS(multi) i2i [News, Social, Spoken, Web, Written] None None
VisualSTS17Eng (Xiao et al., 2024) ['eng'] VisualSTS(eng) i2i [News, Social, Spoken, Web, Written] None None
VisualSTS17Multilingual (Xiao et al., 2024) ['ara', 'deu', 'eng', 'fra', 'ita', 'kor', 'nld', 'spa', 'tur'] VisualSTS(multi) i2i [News, Social, Spoken, Web, Written] None None
VizWizIT2TRetrieval (Gurari et al., 2018) ['eng'] Any2AnyRetrieval it2t [Web] {'test': 6410} {'test': {'number_of_characters': 181824, 'num_samples': 6410, 'num_queries': 4319, 'num_documents': 2091, 'min_document_length': 1, 'average_document_length': 14.45, 'max_document_length': 94, 'unique_documents': 2091, 'num_document_images': 0, 'min_query_length': 7, 'average_query_length': 35.1, 'max_query_length': 264, 'unique_queries': 2798, 'num_query_images': 4319, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2091}}
VoyageMMarcoReranking (Benjamin Clavié, 2023) ['jpn'] Reranking s2s [Academic, Non-fiction, Written] None None
WITT2IRetrieval (Bugliarello et al., 2022) ['ara', 'bul', 'dan', 'ell', 'eng', 'est', 'ind', 'jpn', 'kor', 'tur', 'vie'] Any2AnyMultilingualRetrieval t2i [Encyclopaedic, Written] {'test': 18137} {'test': {'number_of_characters': 506601, 'num_samples': 18137, 'num_queries': 9584, 'num_documents': 8553, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 8553, 'min_query_length': 9, 'average_query_length': 52.86, 'max_query_length': 779, 'unique_queries': 9076, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 8553, 'hf_subset_descriptive_stats': {'ar': {'number_of_characters': 46144, 'num_samples': 1682, 'num_queries': 890, 'num_documents': 792, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 792, 'min_query_length': 4, 'average_query_length': 51.85, 'max_query_length': 533, 'unique_queries': 871, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 792}, 'bg': {'number_of_characters': 40682, 'num_samples': 1666, 'num_queries': 860, 'num_documents': 806, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 806, 'min_query_length': 4, 'average_query_length': 47.3, 'max_query_length': 771, 'unique_queries': 830, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 806}, 'da': {'number_of_characters': 48235, 'num_samples': 1705, 'num_queries': 891, 'num_documents': 814, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 814, 'min_query_length': 4, 'average_query_length': 54.14, 'max_query_length': 537, 'unique_queries': 889, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 814}, 'el': {'number_of_characters': 30842, 'num_samples': 1111, 'num_queries': 570, 'num_documents': 541, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 541, 'min_query_length': 1, 'average_query_length': 54.11, 'max_query_length': 404, 'unique_queries': 565, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 541}, 'et': {'number_of_characters': 33995, 'num_samples': 1654, 'num_queries': 874, 'num_documents': 780, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 780, 'min_query_length': 3, 'average_query_length': 38.9, 'max_query_length': 588, 'unique_queries': 750, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 780}, 'id': {'number_of_characters': 45428, 'num_samples': 1755, 'num_queries': 901, 'num_documents': 854, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 854, 'min_query_length': 1, 'average_query_length': 50.42, 'max_query_length': 628, 'unique_queries': 863, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 854}, 'ko': {'number_of_characters': 18304, 'num_samples': 1820, 'num_queries': 931, 'num_documents': 889, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 889, 'min_query_length': 2, 'average_query_length': 19.66, 'max_query_length': 168, 'unique_queries': 905, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 889}, 'ja': {'number_of_characters': 21706, 'num_samples': 1842, 'num_queries': 1000, 'num_documents': 842, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 842, 'min_query_length': 2, 'average_query_length': 21.71, 'max_query_length': 368, 'unique_queries': 875, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 842}, 'tr': {'number_of_characters': 33434, 'num_samples': 1402, 'num_queries': 721, 'num_documents': 681, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 681, 'min_query_length': 4, 'average_query_length': 46.37, 'max_query_length': 408, 'unique_queries': 712, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 681}, 'vi': {'number_of_characters': 53181, 'num_samples': 1815, 'num_queries': 946, 'num_documents': 869, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 869, 'min_query_length': 3, 'average_query_length': 56.22, 'max_query_length': 476, 'unique_queries': 921, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 869}, 'en': {'number_of_characters': 57978, 'num_samples': 1685, 'num_queries': 1000, 'num_documents': 685, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 685, 'min_query_length': 4, 'average_query_length': 57.98, 'max_query_length': 690, 'unique_queries': 895, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 685}}}}
WRIMEClassification ['jpn'] Classification s2s [Social, Written] None None
Waimai (Xiao et al., 2023) ['cmn'] Classification s2s None None
WebFAQBitextMiningQAs (Michael Dinzinger, 2025) ['ara', 'aze', 'ben', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fas', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun', 'ind', 'isl', 'ita', 'jpn', 'kat', 'kaz', 'kor', 'lav', 'lit', 'mar', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'srp', 'swe', 'tgl', 'tha', 'tur', 'ukr', 'urd', 'vie', 'zho'] BitextMining p2p [Web, Written] {'default': 682057} {'default': {'num_samples': 682057, 'number_of_characters': 526563222, 'unique_pairs': 682057, 'min_sentence1_length': 43, 'average_sentence1_length': 386.04, 'max_sentence1_length': 18410, 'unique_sentence1': 379274, 'min_sentence2_length': 41, 'average_sentence2_length': 385.98, 'max_sentence2_length': 21081, 'unique_sentence2': 398878, 'hf_subset_descriptive_stats': {'ara-fas': {'num_samples': 609, 'number_of_characters': 411293, 'unique_pairs': 609, 'min_sentence1_length': 54, 'average_sentence1_length': 321.44, 'max_sentence1_length': 2446, 'unique_sentence1': 609, 'min_sentence2_length': 63, 'average_sentence2_length': 353.91, 'max_sentence2_length': 2754, 'unique_sentence2': 609}, 'ara-heb': {'num_samples': 978, 'number_of_characters': 628664, 'unique_pairs': 978, 'min_sentence1_length': 68, 'average_sentence1_length': 336.2, 'max_sentence1_length': 4204, 'unique_sentence1': 978, 'min_sentence2_length': 62, 'average_sentence2_length': 306.61, 'max_sentence2_length': 3077, 'unique_sentence2': 978}, 'jpn-kor': {'num_samples': 4820, 'number_of_characters': 1914641, 'unique_pairs': 4820, 'min_sentence1_length': 43, 'average_sentence1_length': 194.26, 'max_sentence1_length': 1741, 'unique_sentence1': 4820, 'min_sentence2_length': 43, 'average_sentence2_length': 202.97, 'max_sentence2_length': 1830, 'unique_sentence2': 4820}, 'jpn-vie': {'num_samples': 1356, 'number_of_characters': 823292, 'unique_pairs': 1356, 'min_sentence1_length': 50, 'average_sentence1_length': 202.49, 'max_sentence1_length': 1660, 'unique_sentence1': 1356, 'min_sentence2_length': 71, 'average_sentence2_length': 404.66, 'max_sentence2_length': 3938, 'unique_sentence2': 1356}, 'jpn-zho': {'num_samples': 1728, 'number_of_characters': 587091, 'unique_pairs': 1728, 'min_sentence1_length': 49, 'average_sentence1_length': 198.45, 'max_sentence1_length': 6803, 'unique_sentence1': 1728, 'min_sentence2_length': 43, 'average_sentence2_length': 141.3, 'max_sentence2_length': 4500, 'unique_sentence2': 1728}, 'kor-vie': {'num_samples': 1386, 'number_of_characters': 906969, 'unique_pairs': 1386, 'min_sentence1_length': 50, 'average_sentence1_length': 224.99, 'max_sentence1_length': 3677, 'unique_sentence1': 1386, 'min_sentence2_length': 71, 'average_sentence2_length': 429.39, 'max_sentence2_length': 6989, 'unique_sentence2': 1386}, 'kor-zho': {'num_samples': 1087, 'number_of_characters': 354672, 'unique_pairs': 1087, 'min_sentence1_length': 64, 'average_sentence1_length': 193.25, 'max_sentence1_length': 1064, 'unique_sentence1': 1087, 'min_sentence2_length': 45, 'average_sentence2_length': 133.03, 'max_sentence2_length': 729, 'unique_sentence2': 1087}, 'vie-zho': {'num_samples': 646, 'number_of_characters': 341786, 'unique_pairs': 646, 'min_sentence1_length': 89, 'average_sentence1_length': 388.88, 'max_sentence1_length': 2114, 'unique_sentence1': 646, 'min_sentence2_length': 45, 'average_sentence2_length': 140.2, 'max_sentence2_length': 1267, 'unique_sentence2': 646}, 'ind-msa': {'num_samples': 455, 'number_of_characters': 197047, 'unique_pairs': 455, 'min_sentence1_length': 78, 'average_sentence1_length': 217.38, 'max_sentence1_length': 870, 'unique_sentence1': 455, 'min_sentence2_length': 72, 'average_sentence2_length': 215.69, 'max_sentence2_length': 784, 'unique_sentence2': 455}, 'ind-tgl': {'num_samples': 378, 'number_of_characters': 265203, 'unique_pairs': 378, 'min_sentence1_length': 74, 'average_sentence1_length': 329.09, 'max_sentence1_length': 1418, 'unique_sentence1': 378, 'min_sentence2_length': 75, 'average_sentence2_length': 372.51, 'max_sentence2_length': 1600, 'unique_sentence2': 378}, 'ind-tha': {'num_samples': 1258, 'number_of_characters': 794128, 'unique_pairs': 1258, 'min_sentence1_length': 72, 'average_sentence1_length': 347.4, 'max_sentence1_length': 3226, 'unique_sentence1': 1258, 'min_sentence2_length': 63, 'average_sentence2_length': 283.86, 'max_sentence2_length': 2816, 'unique_sentence2': 1258}, 'bul-ces': {'num_samples': 1485, 'number_of_characters': 922134, 'unique_pairs': 1485, 'min_sentence1_length': 71, 'average_sentence1_length': 325.89, 'max_sentence1_length': 1945, 'unique_sentence1': 1485, 'min_sentence2_length': 56, 'average_sentence2_length': 295.08, 'max_sentence2_length': 1921, 'unique_sentence2': 1485}, 'bul-lav': {'num_samples': 710, 'number_of_characters': 492895, 'unique_pairs': 710, 'min_sentence1_length': 74, 'average_sentence1_length': 358.15, 'max_sentence1_length': 2765, 'unique_sentence1': 710, 'min_sentence2_length': 61, 'average_sentence2_length': 336.07, 'max_sentence2_length': 2523, 'unique_sentence2': 710}, 'bul-lit': {'num_samples': 803, 'number_of_characters': 540245, 'unique_pairs': 803, 'min_sentence1_length': 71, 'average_sentence1_length': 346.31, 'max_sentence1_length': 1945, 'unique_sentence1': 803, 'min_sentence2_length': 67, 'average_sentence2_length': 326.47, 'max_sentence2_length': 1925, 'unique_sentence2': 803}, 'bul-pol': {'num_samples': 1635, 'number_of_characters': 1126043, 'unique_pairs': 1635, 'min_sentence1_length': 69, 'average_sentence1_length': 347.74, 'max_sentence1_length': 2431, 'unique_sentence1': 1635, 'min_sentence2_length': 68, 'average_sentence2_length': 340.97, 'max_sentence2_length': 2277, 'unique_sentence2': 1635}, 'bul-rus': {'num_samples': 1476, 'number_of_characters': 995879, 'unique_pairs': 1476, 'min_sentence1_length': 71, 'average_sentence1_length': 337.76, 'max_sentence1_length': 4620, 'unique_sentence1': 1476, 'min_sentence2_length': 63, 'average_sentence2_length': 336.95, 'max_sentence2_length': 4654, 'unique_sentence2': 1476}, 'bul-slk': {'num_samples': 1154, 'number_of_characters': 777946, 'unique_pairs': 1154, 'min_sentence1_length': 73, 'average_sentence1_length': 349.94, 'max_sentence1_length': 1945, 'unique_sentence1': 1154, 'min_sentence2_length': 68, 'average_sentence2_length': 324.19, 'max_sentence2_length': 2073, 'unique_sentence2': 1154}, 'bul-slv': {'num_samples': 1034, 'number_of_characters': 673719, 'unique_pairs': 1034, 'min_sentence1_length': 86, 'average_sentence1_length': 339.07, 'max_sentence1_length': 1945, 'unique_sentence1': 1034, 'min_sentence2_length': 79, 'average_sentence2_length': 312.5, 'max_sentence2_length': 1872, 'unique_sentence2': 1034}, 'bul-srp': {'num_samples': 296, 'number_of_characters': 222838, 'unique_pairs': 296, 'min_sentence1_length': 92, 'average_sentence1_length': 390.41, 'max_sentence1_length': 1945, 'unique_sentence1': 296, 'min_sentence2_length': 87, 'average_sentence2_length': 362.43, 'max_sentence2_length': 1845, 'unique_sentence2': 296}, 'bul-ukr': {'num_samples': 1074, 'number_of_characters': 708525, 'unique_pairs': 1074, 'min_sentence1_length': 64, 'average_sentence1_length': 335.27, 'max_sentence1_length': 2057, 'unique_sentence1': 1074, 'min_sentence2_length': 59, 'average_sentence2_length': 324.43, 'max_sentence2_length': 2042, 'unique_sentence2': 1074}, 'ces-lav': {'num_samples': 875, 'number_of_characters': 569552, 'unique_pairs': 875, 'min_sentence1_length': 74, 'average_sentence1_length': 318.83, 'max_sentence1_length': 1921, 'unique_sentence1': 875, 'min_sentence2_length': 76, 'average_sentence2_length': 332.08, 'max_sentence2_length': 1948, 'unique_sentence2': 875}, 'ces-lit': {'num_samples': 1002, 'number_of_characters': 674361, 'unique_pairs': 1002, 'min_sentence1_length': 73, 'average_sentence1_length': 328.37, 'max_sentence1_length': 2956, 'unique_sentence1': 1002, 'min_sentence2_length': 84, 'average_sentence2_length': 344.64, 'max_sentence2_length': 2995, 'unique_sentence2': 1002}, 'ces-pol': {'num_samples': 3367, 'number_of_characters': 2230397, 'unique_pairs': 3367, 'min_sentence1_length': 56, 'average_sentence1_length': 317.49, 'max_sentence1_length': 2453, 'unique_sentence1': 3367, 'min_sentence2_length': 64, 'average_sentence2_length': 344.94, 'max_sentence2_length': 2621, 'unique_sentence2': 3367}, 'ces-rus': {'num_samples': 2144, 'number_of_characters': 1438311, 'unique_pairs': 2144, 'min_sentence1_length': 56, 'average_sentence1_length': 319.4, 'max_sentence1_length': 2349, 'unique_sentence1': 2144, 'min_sentence2_length': 71, 'average_sentence2_length': 351.45, 'max_sentence2_length': 2509, 'unique_sentence2': 2144}, 'ces-slk': {'num_samples': 2551, 'number_of_characters': 1733126, 'unique_pairs': 2551, 'min_sentence1_length': 65, 'average_sentence1_length': 334.9, 'max_sentence1_length': 7967, 'unique_sentence1': 2551, 'min_sentence2_length': 67, 'average_sentence2_length': 344.49, 'max_sentence2_length': 10365, 'unique_sentence2': 2551}, 'ces-slv': {'num_samples': 1370, 'number_of_characters': 848116, 'unique_pairs': 1370, 'min_sentence1_length': 54, 'average_sentence1_length': 304.5, 'max_sentence1_length': 2956, 'unique_sentence1': 1370, 'min_sentence2_length': 59, 'average_sentence2_length': 314.56, 'max_sentence2_length': 3006, 'unique_sentence2': 1370}, 'ces-srp': {'num_samples': 362, 'number_of_characters': 238713, 'unique_pairs': 362, 'min_sentence1_length': 77, 'average_sentence1_length': 322.69, 'max_sentence1_length': 1921, 'unique_sentence1': 362, 'min_sentence2_length': 80, 'average_sentence2_length': 336.74, 'max_sentence2_length': 1861, 'unique_sentence2': 362}, 'ces-ukr': {'num_samples': 1285, 'number_of_characters': 789567, 'unique_pairs': 1285, 'min_sentence1_length': 56, 'average_sentence1_length': 295.05, 'max_sentence1_length': 1921, 'unique_sentence1': 1285, 'min_sentence2_length': 69, 'average_sentence2_length': 319.4, 'max_sentence2_length': 2042, 'unique_sentence2': 1285}, 'hrv-slk': {'num_samples': 313, 'number_of_characters': 184033, 'unique_pairs': 313, 'min_sentence1_length': 112, 'average_sentence1_length': 295.81, 'max_sentence1_length': 1393, 'unique_sentence1': 313, 'min_sentence2_length': 104, 'average_sentence2_length': 292.16, 'max_sentence2_length': 1411, 'unique_sentence2': 313}, 'kat-rus': {'num_samples': 262, 'number_of_characters': 190050, 'unique_pairs': 262, 'min_sentence1_length': 68, 'average_sentence1_length': 362.44, 'max_sentence1_length': 2879, 'unique_sentence1': 262, 'min_sentence2_length': 74, 'average_sentence2_length': 362.95, 'max_sentence2_length': 3069, 'unique_sentence2': 262}, 'lav-lit': {'num_samples': 1061, 'number_of_characters': 794243, 'unique_pairs': 1061, 'min_sentence1_length': 61, 'average_sentence1_length': 372.32, 'max_sentence1_length': 2410, 'unique_sentence1': 1061, 'min_sentence2_length': 67, 'average_sentence2_length': 376.26, 'max_sentence2_length': 2463, 'unique_sentence2': 1061}, 'lav-pol': {'num_samples': 951, 'number_of_characters': 701354, 'unique_pairs': 951, 'min_sentence1_length': 63, 'average_sentence1_length': 359.69, 'max_sentence1_length': 2044, 'unique_sentence1': 951, 'min_sentence2_length': 72, 'average_sentence2_length': 377.8, 'max_sentence2_length': 2234, 'unique_sentence2': 951}, 'lav-rus': {'num_samples': 1412, 'number_of_characters': 1039535, 'unique_pairs': 1412, 'min_sentence1_length': 61, 'average_sentence1_length': 358.04, 'max_sentence1_length': 2206, 'unique_sentence1': 1412, 'min_sentence2_length': 63, 'average_sentence2_length': 378.18, 'max_sentence2_length': 2383, 'unique_sentence2': 1412}, 'lav-slk': {'num_samples': 789, 'number_of_characters': 535091, 'unique_pairs': 789, 'min_sentence1_length': 75, 'average_sentence1_length': 342.33, 'max_sentence1_length': 1948, 'unique_sentence1': 789, 'min_sentence2_length': 68, 'average_sentence2_length': 335.86, 'max_sentence2_length': 1910, 'unique_sentence2': 789}, 'lav-slv': {'num_samples': 518, 'number_of_characters': 340127, 'unique_pairs': 518, 'min_sentence1_length': 76, 'average_sentence1_length': 329.34, 'max_sentence1_length': 1948, 'unique_sentence1': 518, 'min_sentence2_length': 71, 'average_sentence2_length': 327.28, 'max_sentence2_length': 1872, 'unique_sentence2': 518}, 'lav-ukr': {'num_samples': 579, 'number_of_characters': 428022, 'unique_pairs': 579, 'min_sentence1_length': 61, 'average_sentence1_length': 365.16, 'max_sentence1_length': 2410, 'unique_sentence1': 579, 'min_sentence2_length': 59, 'average_sentence2_length': 374.08, 'max_sentence2_length': 2412, 'unique_sentence2': 579}, 'lit-pol': {'num_samples': 1026, 'number_of_characters': 767128, 'unique_pairs': 1026, 'min_sentence1_length': 64, 'average_sentence1_length': 366.06, 'max_sentence1_length': 1990, 'unique_sentence1': 1026, 'min_sentence2_length': 79, 'average_sentence2_length': 381.63, 'max_sentence2_length': 2234, 'unique_sentence2': 1026}, 'lit-rus': {'num_samples': 961, 'number_of_characters': 744509, 'unique_pairs': 961, 'min_sentence1_length': 67, 'average_sentence1_length': 379.47, 'max_sentence1_length': 3141, 'unique_sentence1': 961, 'min_sentence2_length': 63, 'average_sentence2_length': 395.26, 'max_sentence2_length': 2201, 'unique_sentence2': 961}, 'lit-slk': {'num_samples': 859, 'number_of_characters': 583451, 'unique_pairs': 859, 'min_sentence1_length': 74, 'average_sentence1_length': 344.75, 'max_sentence1_length': 1925, 'unique_sentence1': 859, 'min_sentence2_length': 68, 'average_sentence2_length': 334.47, 'max_sentence2_length': 1961, 'unique_sentence2': 859}, 'lit-slv': {'num_samples': 607, 'number_of_characters': 438866, 'unique_pairs': 607, 'min_sentence1_length': 70, 'average_sentence1_length': 366.02, 'max_sentence1_length': 2995, 'unique_sentence1': 607, 'min_sentence2_length': 75, 'average_sentence2_length': 356.99, 'max_sentence2_length': 3006, 'unique_sentence2': 607}, 'lit-ukr': {'num_samples': 639, 'number_of_characters': 463616, 'unique_pairs': 639, 'min_sentence1_length': 67, 'average_sentence1_length': 361.16, 'max_sentence1_length': 2463, 'unique_sentence1': 639, 'min_sentence2_length': 59, 'average_sentence2_length': 364.37, 'max_sentence2_length': 2412, 'unique_sentence2': 639}, 'pol-rus': {'num_samples': 5014, 'number_of_characters': 3850186, 'unique_pairs': 5014, 'min_sentence1_length': 60, 'average_sentence1_length': 380.93, 'max_sentence1_length': 5103, 'unique_sentence1': 5014, 'min_sentence2_length': 59, 'average_sentence2_length': 386.95, 'max_sentence2_length': 4888, 'unique_sentence2': 5014}, 'pol-slk': {'num_samples': 1918, 'number_of_characters': 1321855, 'unique_pairs': 1918, 'min_sentence1_length': 71, 'average_sentence1_length': 354.28, 'max_sentence1_length': 5103, 'unique_sentence1': 1918, 'min_sentence2_length': 67, 'average_sentence2_length': 334.9, 'max_sentence2_length': 4641, 'unique_sentence2': 1918}, 'pol-slv': {'num_samples': 1382, 'number_of_characters': 859222, 'unique_pairs': 1382, 'min_sentence1_length': 76, 'average_sentence1_length': 317.49, 'max_sentence1_length': 2101, 'unique_sentence1': 1382, 'min_sentence2_length': 75, 'average_sentence2_length': 304.23, 'max_sentence2_length': 2015, 'unique_sentence2': 1382}, 'pol-srp': {'num_samples': 492, 'number_of_characters': 350413, 'unique_pairs': 492, 'min_sentence1_length': 82, 'average_sentence1_length': 357.26, 'max_sentence1_length': 1902, 'unique_sentence1': 492, 'min_sentence2_length': 78, 'average_sentence2_length': 354.96, 'max_sentence2_length': 1845, 'unique_sentence2': 492}, 'pol-ukr': {'num_samples': 2370, 'number_of_characters': 1753652, 'unique_pairs': 2370, 'min_sentence1_length': 59, 'average_sentence1_length': 373.9, 'max_sentence1_length': 3106, 'unique_sentence1': 2370, 'min_sentence2_length': 61, 'average_sentence2_length': 366.04, 'max_sentence2_length': 2827, 'unique_sentence2': 2370}, 'rus-slk': {'num_samples': 1263, 'number_of_characters': 905526, 'unique_pairs': 1263, 'min_sentence1_length': 69, 'average_sentence1_length': 371.39, 'max_sentence1_length': 4888, 'unique_sentence1': 1263, 'min_sentence2_length': 67, 'average_sentence2_length': 345.58, 'max_sentence2_length': 4641, 'unique_sentence2': 1263}, 'rus-slv': {'num_samples': 1096, 'number_of_characters': 719013, 'unique_pairs': 1096, 'min_sentence1_length': 84, 'average_sentence1_length': 341.51, 'max_sentence1_length': 2164, 'unique_sentence1': 1096, 'min_sentence2_length': 71, 'average_sentence2_length': 314.52, 'max_sentence2_length': 2015, 'unique_sentence2': 1096}, 'rus-srp': {'num_samples': 455, 'number_of_characters': 341619, 'unique_pairs': 455, 'min_sentence1_length': 92, 'average_sentence1_length': 386.63, 'max_sentence1_length': 1921, 'unique_sentence1': 455, 'min_sentence2_length': 90, 'average_sentence2_length': 364.18, 'max_sentence2_length': 1845, 'unique_sentence2': 455}, 'rus-ukr': {'num_samples': 15251, 'number_of_characters': 10782282, 'unique_pairs': 15251, 'min_sentence1_length': 55, 'average_sentence1_length': 358.27, 'max_sentence1_length': 3905, 'unique_sentence1': 15251, 'min_sentence2_length': 49, 'average_sentence2_length': 348.72, 'max_sentence2_length': 3801, 'unique_sentence2': 15251}, 'slk-slv': {'num_samples': 1259, 'number_of_characters': 852109, 'unique_pairs': 1259, 'min_sentence1_length': 68, 'average_sentence1_length': 338.08, 'max_sentence1_length': 1961, 'unique_sentence1': 1259, 'min_sentence2_length': 71, 'average_sentence2_length': 338.74, 'max_sentence2_length': 1872, 'unique_sentence2': 1259}, 'slk-srp': {'num_samples': 561, 'number_of_characters': 493396, 'unique_pairs': 561, 'min_sentence1_length': 82, 'average_sentence1_length': 431.13, 'max_sentence1_length': 1910, 'unique_sentence1': 561, 'min_sentence2_length': 80, 'average_sentence2_length': 448.36, 'max_sentence2_length': 1845, 'unique_sentence2': 561}, 'slk-ukr': {'num_samples': 944, 'number_of_characters': 608143, 'unique_pairs': 944, 'min_sentence1_length': 68, 'average_sentence1_length': 314.41, 'max_sentence1_length': 1910, 'unique_sentence1': 944, 'min_sentence2_length': 69, 'average_sentence2_length': 329.81, 'max_sentence2_length': 1923, 'unique_sentence2': 944}, 'slv-srp': {'num_samples': 499, 'number_of_characters': 378293, 'unique_pairs': 499, 'min_sentence1_length': 80, 'average_sentence1_length': 374.47, 'max_sentence1_length': 2476, 'unique_sentence1': 499, 'min_sentence2_length': 80, 'average_sentence2_length': 383.63, 'max_sentence2_length': 2387, 'unique_sentence2': 499}, 'slv-ukr': {'num_samples': 733, 'number_of_characters': 431361, 'unique_pairs': 733, 'min_sentence1_length': 71, 'average_sentence1_length': 286.64, 'max_sentence1_length': 1872, 'unique_sentence1': 733, 'min_sentence2_length': 69, 'average_sentence2_length': 301.85, 'max_sentence2_length': 1923, 'unique_sentence2': 733}, 'cat-deu': {'num_samples': 302, 'number_of_characters': 279459, 'unique_pairs': 302, 'min_sentence1_length': 66, 'average_sentence1_length': 451.95, 'max_sentence1_length': 2056, 'unique_sentence1': 302, 'min_sentence2_length': 77, 'average_sentence2_length': 473.41, 'max_sentence2_length': 2082, 'unique_sentence2': 302}, 'cat-fra': {'num_samples': 598, 'number_of_characters': 476709, 'unique_pairs': 598, 'min_sentence1_length': 62, 'average_sentence1_length': 381.68, 'max_sentence1_length': 2056, 'unique_sentence1': 598, 'min_sentence2_length': 74, 'average_sentence2_length': 415.5, 'max_sentence2_length': 2277, 'unique_sentence2': 598}, 'cat-ita': {'num_samples': 418, 'number_of_characters': 327132, 'unique_pairs': 418, 'min_sentence1_length': 60, 'average_sentence1_length': 388.8, 'max_sentence1_length': 2056, 'unique_sentence1': 418, 'min_sentence2_length': 55, 'average_sentence2_length': 393.81, 'max_sentence2_length': 2186, 'unique_sentence2': 418}, 'cat-por': {'num_samples': 370, 'number_of_characters': 248938, 'unique_pairs': 370, 'min_sentence1_length': 58, 'average_sentence1_length': 338.32, 'max_sentence1_length': 2056, 'unique_sentence1': 370, 'min_sentence2_length': 60, 'average_sentence2_length': 334.48, 'max_sentence2_length': 2088, 'unique_sentence2': 370}, 'cat-spa': {'num_samples': 2648, 'number_of_characters': 2308040, 'unique_pairs': 2648, 'min_sentence1_length': 62, 'average_sentence1_length': 430.96, 'max_sentence1_length': 8113, 'unique_sentence1': 2648, 'min_sentence2_length': 65, 'average_sentence2_length': 440.66, 'max_sentence2_length': 8345, 'unique_sentence2': 2648}, 'dan-deu': {'num_samples': 4337, 'number_of_characters': 3443148, 'unique_pairs': 4337, 'min_sentence1_length': 60, 'average_sentence1_length': 372.7, 'max_sentence1_length': 4236, 'unique_sentence1': 4337, 'min_sentence2_length': 69, 'average_sentence2_length': 421.21, 'max_sentence2_length': 4093, 'unique_sentence2': 4337}, 'dan-fra': {'num_samples': 3802, 'number_of_characters': 3021023, 'unique_pairs': 3802, 'min_sentence1_length': 62, 'average_sentence1_length': 365.2, 'max_sentence1_length': 4236, 'unique_sentence1': 3802, 'min_sentence2_length': 70, 'average_sentence2_length': 429.39, 'max_sentence2_length': 4717, 'unique_sentence2': 3802}, 'dan-isl': {'num_samples': 327, 'number_of_characters': 230870, 'unique_pairs': 327, 'min_sentence1_length': 81, 'average_sentence1_length': 357.51, 'max_sentence1_length': 1856, 'unique_sentence1': 327, 'min_sentence2_length': 82, 'average_sentence2_length': 348.51, 'max_sentence2_length': 1897, 'unique_sentence2': 327}, 'dan-ita': {'num_samples': 3818, 'number_of_characters': 2976506, 'unique_pairs': 3818, 'min_sentence1_length': 62, 'average_sentence1_length': 371.54, 'max_sentence1_length': 4236, 'unique_sentence1': 3818, 'min_sentence2_length': 64, 'average_sentence2_length': 408.06, 'max_sentence2_length': 4574, 'unique_sentence2': 3818}, 'dan-nld': {'num_samples': 4099, 'number_of_characters': 3047816, 'unique_pairs': 4099, 'min_sentence1_length': 68, 'average_sentence1_length': 360.12, 'max_sentence1_length': 4236, 'unique_sentence1': 4099, 'min_sentence2_length': 63, 'average_sentence2_length': 383.43, 'max_sentence2_length': 4431, 'unique_sentence2': 4099}, 'dan-nor': {'num_samples': 2603, 'number_of_characters': 1873194, 'unique_pairs': 2603, 'min_sentence1_length': 62, 'average_sentence1_length': 365.46, 'max_sentence1_length': 3505, 'unique_sentence1': 2603, 'min_sentence2_length': 59, 'average_sentence2_length': 354.17, 'max_sentence2_length': 3400, 'unique_sentence2': 2603}, 'dan-por': {'num_samples': 3206, 'number_of_characters': 2344257, 'unique_pairs': 3206, 'min_sentence1_length': 60, 'average_sentence1_length': 353.3, 'max_sentence1_length': 3843, 'unique_sentence1': 3206, 'min_sentence2_length': 65, 'average_sentence2_length': 377.91, 'max_sentence2_length': 3799, 'unique_sentence2': 3206}, 'dan-ron': {'num_samples': 2052, 'number_of_characters': 1446475, 'unique_pairs': 2052, 'min_sentence1_length': 67, 'average_sentence1_length': 336.33, 'max_sentence1_length': 4236, 'unique_sentence1': 2052, 'min_sentence2_length': 63, 'average_sentence2_length': 368.58, 'max_sentence2_length': 4285, 'unique_sentence2': 2052}, 'dan-spa': {'num_samples': 3571, 'number_of_characters': 2720999, 'unique_pairs': 3571, 'min_sentence1_length': 60, 'average_sentence1_length': 360.46, 'max_sentence1_length': 4236, 'unique_sentence1': 3571, 'min_sentence2_length': 65, 'average_sentence2_length': 401.51, 'max_sentence2_length': 4498, 'unique_sentence2': 3571}, 'dan-swe': {'num_samples': 4268, 'number_of_characters': 3115686, 'unique_pairs': 4268, 'min_sentence1_length': 63, 'average_sentence1_length': 368.21, 'max_sentence1_length': 15317, 'unique_sentence1': 4268, 'min_sentence2_length': 64, 'average_sentence2_length': 361.8, 'max_sentence2_length': 15427, 'unique_sentence2': 4268}, 'deu-fra': {'num_samples': 27727, 'number_of_characters': 24341503, 'unique_pairs': 27727, 'min_sentence1_length': 52, 'average_sentence1_length': 429.85, 'max_sentence1_length': 10595, 'unique_sentence1': 27727, 'min_sentence2_length': 60, 'average_sentence2_length': 448.05, 'max_sentence2_length': 11165, 'unique_sentence2': 27727}, 'deu-isl': {'num_samples': 294, 'number_of_characters': 221411, 'unique_pairs': 294, 'min_sentence1_length': 84, 'average_sentence1_length': 403.22, 'max_sentence1_length': 2082, 'unique_sentence1': 294, 'min_sentence2_length': 79, 'average_sentence2_length': 349.88, 'max_sentence2_length': 1897, 'unique_sentence2': 294}, 'deu-ita': {'num_samples': 18787, 'number_of_characters': 15815694, 'unique_pairs': 18787, 'min_sentence1_length': 52, 'average_sentence1_length': 424.82, 'max_sentence1_length': 7136, 'unique_sentence1': 18787, 'min_sentence2_length': 56, 'average_sentence2_length': 417.02, 'max_sentence2_length': 6634, 'unique_sentence2': 18787}, 'deu-nld': {'num_samples': 14211, 'number_of_characters': 11610783, 'unique_pairs': 14211, 'min_sentence1_length': 52, 'average_sentence1_length': 418.9, 'max_sentence1_length': 4919, 'unique_sentence1': 14211, 'min_sentence2_length': 62, 'average_sentence2_length': 398.13, 'max_sentence2_length': 4779, 'unique_sentence2': 14211}, 'deu-nor': {'num_samples': 2783, 'number_of_characters': 2150067, 'unique_pairs': 2783, 'min_sentence1_length': 63, 'average_sentence1_length': 413.82, 'max_sentence1_length': 18410, 'unique_sentence1': 2783, 'min_sentence2_length': 63, 'average_sentence2_length': 358.75, 'max_sentence2_length': 16149, 'unique_sentence2': 2783}, 'deu-por': {'num_samples': 11319, 'number_of_characters': 9085897, 'unique_pairs': 11319, 'min_sentence1_length': 63, 'average_sentence1_length': 412.04, 'max_sentence1_length': 7136, 'unique_sentence1': 11319, 'min_sentence2_length': 59, 'average_sentence2_length': 390.67, 'max_sentence2_length': 6536, 'unique_sentence2': 11319}, 'deu-ron': {'num_samples': 3598, 'number_of_characters': 2755112, 'unique_pairs': 3598, 'min_sentence1_length': 61, 'average_sentence1_length': 387.56, 'max_sentence1_length': 4093, 'unique_sentence1': 3598, 'min_sentence2_length': 55, 'average_sentence2_length': 378.17, 'max_sentence2_length': 4285, 'unique_sentence2': 3598}, 'deu-spa': {'num_samples': 19739, 'number_of_characters': 16855942, 'unique_pairs': 19739, 'min_sentence1_length': 60, 'average_sentence1_length': 430.84, 'max_sentence1_length': 7136, 'unique_sentence1': 19739, 'min_sentence2_length': 55, 'average_sentence2_length': 423.1, 'max_sentence2_length': 6963, 'unique_sentence2': 19739}, 'deu-swe': {'num_samples': 5772, 'number_of_characters': 4469906, 'unique_pairs': 5772, 'min_sentence1_length': 59, 'average_sentence1_length': 412.41, 'max_sentence1_length': 4093, 'unique_sentence1': 5772, 'min_sentence2_length': 57, 'average_sentence2_length': 362.0, 'max_sentence2_length': 4038, 'unique_sentence2': 5772}, 'fra-isl': {'num_samples': 347, 'number_of_characters': 256923, 'unique_pairs': 347, 'min_sentence1_length': 76, 'average_sentence1_length': 400.64, 'max_sentence1_length': 2277, 'unique_sentence1': 347, 'min_sentence2_length': 75, 'average_sentence2_length': 339.77, 'max_sentence2_length': 1897, 'unique_sentence2': 347}, 'fra-ita': {'num_samples': 20002, 'number_of_characters': 17269559, 'unique_pairs': 20002, 'min_sentence1_length': 62, 'average_sentence1_length': 444.94, 'max_sentence1_length': 7253, 'unique_sentence1': 20002, 'min_sentence2_length': 49, 'average_sentence2_length': 418.46, 'max_sentence2_length': 6634, 'unique_sentence2': 20002}, 'fra-nld': {'num_samples': 14684, 'number_of_characters': 12784405, 'unique_pairs': 14684, 'min_sentence1_length': 60, 'average_sentence1_length': 455.31, 'max_sentence1_length': 9107, 'unique_sentence1': 14684, 'min_sentence2_length': 51, 'average_sentence2_length': 415.33, 'max_sentence2_length': 8534, 'unique_sentence2': 14684}, 'fra-nor': {'num_samples': 2558, 'number_of_characters': 1963253, 'unique_pairs': 2558, 'min_sentence1_length': 86, 'average_sentence1_length': 421.16, 'max_sentence1_length': 4086, 'unique_sentence1': 2558, 'min_sentence2_length': 64, 'average_sentence2_length': 346.33, 'max_sentence2_length': 3400, 'unique_sentence2': 2558}, 'fra-por': {'num_samples': 13265, 'number_of_characters': 10942625, 'unique_pairs': 13265, 'min_sentence1_length': 61, 'average_sentence1_length': 431.68, 'max_sentence1_length': 7253, 'unique_sentence1': 13265, 'min_sentence2_length': 55, 'average_sentence2_length': 393.24, 'max_sentence2_length': 6536, 'unique_sentence2': 13265}, 'fra-ron': {'num_samples': 3295, 'number_of_characters': 2448321, 'unique_pairs': 3295, 'min_sentence1_length': 70, 'average_sentence1_length': 385.29, 'max_sentence1_length': 4717, 'unique_sentence1': 3295, 'min_sentence2_length': 63, 'average_sentence2_length': 357.75, 'max_sentence2_length': 4285, 'unique_sentence2': 3295}, 'fra-spa': {'num_samples': 23311, 'number_of_characters': 20477763, 'unique_pairs': 23311, 'min_sentence1_length': 54, 'average_sentence1_length': 451.84, 'max_sentence1_length': 14943, 'unique_sentence1': 23311, 'min_sentence2_length': 50, 'average_sentence2_length': 426.62, 'max_sentence2_length': 13323, 'unique_sentence2': 23311}, 'fra-swe': {'num_samples': 5006, 'number_of_characters': 3880565, 'unique_pairs': 5006, 'min_sentence1_length': 70, 'average_sentence1_length': 421.95, 'max_sentence1_length': 4717, 'unique_sentence1': 5006, 'min_sentence2_length': 59, 'average_sentence2_length': 353.23, 'max_sentence2_length': 4038, 'unique_sentence2': 5006}, 'isl-ita': {'num_samples': 421, 'number_of_characters': 310158, 'unique_pairs': 421, 'min_sentence1_length': 75, 'average_sentence1_length': 350.33, 'max_sentence1_length': 1897, 'unique_sentence1': 421, 'min_sentence2_length': 74, 'average_sentence2_length': 386.39, 'max_sentence2_length': 2186, 'unique_sentence2': 421}, 'isl-nld': {'num_samples': 311, 'number_of_characters': 228026, 'unique_pairs': 311, 'min_sentence1_length': 123, 'average_sentence1_length': 350.56, 'max_sentence1_length': 1705, 'unique_sentence1': 311, 'min_sentence2_length': 125, 'average_sentence2_length': 382.64, 'max_sentence2_length': 2142, 'unique_sentence2': 311}, 'isl-por': {'num_samples': 341, 'number_of_characters': 260057, 'unique_pairs': 341, 'min_sentence1_length': 110, 'average_sentence1_length': 367.62, 'max_sentence1_length': 1897, 'unique_sentence1': 341, 'min_sentence2_length': 128, 'average_sentence2_length': 395.01, 'max_sentence2_length': 2088, 'unique_sentence2': 341}, 'isl-spa': {'num_samples': 366, 'number_of_characters': 262085, 'unique_pairs': 366, 'min_sentence1_length': 75, 'average_sentence1_length': 336.95, 'max_sentence1_length': 1517, 'unique_sentence1': 366, 'min_sentence2_length': 73, 'average_sentence2_length': 379.13, 'max_sentence2_length': 1601, 'unique_sentence2': 366}, 'isl-swe': {'num_samples': 312, 'number_of_characters': 214179, 'unique_pairs': 312, 'min_sentence1_length': 75, 'average_sentence1_length': 342.19, 'max_sentence1_length': 1897, 'unique_sentence1': 312, 'min_sentence2_length': 78, 'average_sentence2_length': 344.28, 'max_sentence2_length': 1841, 'unique_sentence2': 312}, 'ita-nld': {'num_samples': 9160, 'number_of_characters': 7462623, 'unique_pairs': 9160, 'min_sentence1_length': 64, 'average_sentence1_length': 413.37, 'max_sentence1_length': 16311, 'unique_sentence1': 9160, 'min_sentence2_length': 62, 'average_sentence2_length': 401.33, 'max_sentence2_length': 15855, 'unique_sentence2': 9160}, 'ita-nor': {'num_samples': 2516, 'number_of_characters': 1877016, 'unique_pairs': 2516, 'min_sentence1_length': 88, 'average_sentence1_length': 395.47, 'max_sentence1_length': 3906, 'unique_sentence1': 2516, 'min_sentence2_length': 80, 'average_sentence2_length': 350.57, 'max_sentence2_length': 3030, 'unique_sentence2': 2516}, 'ita-por': {'num_samples': 10924, 'number_of_characters': 8751330, 'unique_pairs': 10924, 'min_sentence1_length': 55, 'average_sentence1_length': 406.05, 'max_sentence1_length': 16311, 'unique_sentence1': 10924, 'min_sentence2_length': 55, 'average_sentence2_length': 395.06, 'max_sentence2_length': 16230, 'unique_sentence2': 10924}, 'ita-ron': {'num_samples': 3360, 'number_of_characters': 2417807, 'unique_pairs': 3360, 'min_sentence1_length': 63, 'average_sentence1_length': 360.05, 'max_sentence1_length': 6226, 'unique_sentence1': 3360, 'min_sentence2_length': 65, 'average_sentence2_length': 359.53, 'max_sentence2_length': 6571, 'unique_sentence2': 3360}, 'ita-spa': {'num_samples': 16534, 'number_of_characters': 14167503, 'unique_pairs': 16534, 'min_sentence1_length': 49, 'average_sentence1_length': 426.64, 'max_sentence1_length': 16311, 'unique_sentence1': 16534, 'min_sentence2_length': 50, 'average_sentence2_length': 430.23, 'max_sentence2_length': 16655, 'unique_sentence2': 16534}, 'ita-swe': {'num_samples': 4741, 'number_of_characters': 3557838, 'unique_pairs': 4741, 'min_sentence1_length': 64, 'average_sentence1_length': 394.97, 'max_sentence1_length': 16311, 'unique_sentence1': 4741, 'min_sentence2_length': 57, 'average_sentence2_length': 355.47, 'max_sentence2_length': 15020, 'unique_sentence2': 4741}, 'nld-nor': {'num_samples': 2664, 'number_of_characters': 1941142, 'unique_pairs': 2664, 'min_sentence1_length': 72, 'average_sentence1_length': 380.63, 'max_sentence1_length': 3967, 'unique_sentence1': 2664, 'min_sentence2_length': 68, 'average_sentence2_length': 348.03, 'max_sentence2_length': 3400, 'unique_sentence2': 2664}, 'nld-por': {'num_samples': 7021, 'number_of_characters': 5347190, 'unique_pairs': 7021, 'min_sentence1_length': 51, 'average_sentence1_length': 380.22, 'max_sentence1_length': 15855, 'unique_sentence1': 7021, 'min_sentence2_length': 56, 'average_sentence2_length': 381.38, 'max_sentence2_length': 16230, 'unique_sentence2': 7021}, 'nld-ron': {'num_samples': 2888, 'number_of_characters': 2001437, 'unique_pairs': 2888, 'min_sentence1_length': 60, 'average_sentence1_length': 340.69, 'max_sentence1_length': 4431, 'unique_sentence1': 2888, 'min_sentence2_length': 70, 'average_sentence2_length': 352.33, 'max_sentence2_length': 4285, 'unique_sentence2': 2888}, 'nld-spa': {'num_samples': 9555, 'number_of_characters': 7861048, 'unique_pairs': 9555, 'min_sentence1_length': 62, 'average_sentence1_length': 403.95, 'max_sentence1_length': 15855, 'unique_sentence1': 9555, 'min_sentence2_length': 55, 'average_sentence2_length': 418.76, 'max_sentence2_length': 16655, 'unique_sentence2': 9555}, 'nld-swe': {'num_samples': 5072, 'number_of_characters': 3727392, 'unique_pairs': 5072, 'min_sentence1_length': 65, 'average_sentence1_length': 381.69, 'max_sentence1_length': 15855, 'unique_sentence1': 5072, 'min_sentence2_length': 56, 'average_sentence2_length': 353.2, 'max_sentence2_length': 15020, 'unique_sentence2': 5072}, 'nor-por': {'num_samples': 2096, 'number_of_characters': 1461412, 'unique_pairs': 2096, 'min_sentence1_length': 68, 'average_sentence1_length': 331.47, 'max_sentence1_length': 3400, 'unique_sentence1': 2096, 'min_sentence2_length': 61, 'average_sentence2_length': 365.77, 'max_sentence2_length': 3784, 'unique_sentence2': 2096}, 'nor-ron': {'num_samples': 1412, 'number_of_characters': 972154, 'unique_pairs': 1412, 'min_sentence1_length': 78, 'average_sentence1_length': 324.01, 'max_sentence1_length': 1884, 'unique_sentence1': 1412, 'min_sentence2_length': 75, 'average_sentence2_length': 364.48, 'max_sentence2_length': 2196, 'unique_sentence2': 1412}, 'nor-spa': {'num_samples': 2603, 'number_of_characters': 1933198, 'unique_pairs': 2603, 'min_sentence1_length': 63, 'average_sentence1_length': 347.56, 'max_sentence1_length': 3030, 'unique_sentence1': 2603, 'min_sentence2_length': 74, 'average_sentence2_length': 395.12, 'max_sentence2_length': 3847, 'unique_sentence2': 2603}, 'nor-swe': {'num_samples': 3165, 'number_of_characters': 2305135, 'unique_pairs': 3165, 'min_sentence1_length': 66, 'average_sentence1_length': 360.99, 'max_sentence1_length': 2366, 'unique_sentence1': 3165, 'min_sentence2_length': 70, 'average_sentence2_length': 367.33, 'max_sentence2_length': 2340, 'unique_sentence2': 3165}, 'por-ron': {'num_samples': 3026, 'number_of_characters': 2086079, 'unique_pairs': 3026, 'min_sentence1_length': 71, 'average_sentence1_length': 340.88, 'max_sentence1_length': 4439, 'unique_sentence1': 3026, 'min_sentence2_length': 63, 'average_sentence2_length': 348.51, 'max_sentence2_length': 3274, 'unique_sentence2': 3026}, 'por-spa': {'num_samples': 16084, 'number_of_characters': 12835938, 'unique_pairs': 16084, 'min_sentence1_length': 51, 'average_sentence1_length': 391.86, 'max_sentence1_length': 16230, 'unique_sentence1': 16084, 'min_sentence2_length': 54, 'average_sentence2_length': 406.2, 'max_sentence2_length': 16655, 'unique_sentence2': 16084}, 'por-swe': {'num_samples': 4235, 'number_of_characters': 2994503, 'unique_pairs': 4235, 'min_sentence1_length': 62, 'average_sentence1_length': 367.66, 'max_sentence1_length': 16230, 'unique_sentence1': 4235, 'min_sentence2_length': 57, 'average_sentence2_length': 339.43, 'max_sentence2_length': 15020, 'unique_sentence2': 4235}, 'ron-spa': {'num_samples': 3375, 'number_of_characters': 2415347, 'unique_pairs': 3375, 'min_sentence1_length': 73, 'average_sentence1_length': 355.39, 'max_sentence1_length': 4285, 'unique_sentence1': 3375, 'min_sentence2_length': 70, 'average_sentence2_length': 360.27, 'max_sentence2_length': 4498, 'unique_sentence2': 3375}, 'ron-swe': {'num_samples': 2154, 'number_of_characters': 1454257, 'unique_pairs': 2154, 'min_sentence1_length': 63, 'average_sentence1_length': 354.66, 'max_sentence1_length': 4285, 'unique_sentence1': 2154, 'min_sentence2_length': 63, 'average_sentence2_length': 320.48, 'max_sentence2_length': 4038, 'unique_sentence2': 2154}, 'spa-swe': {'num_samples': 4884, 'number_of_characters': 3751782, 'unique_pairs': 4884, 'min_sentence1_length': 66, 'average_sentence1_length': 406.52, 'max_sentence1_length': 16655, 'unique_sentence1': 4884, 'min_sentence2_length': 62, 'average_sentence2_length': 361.66, 'max_sentence2_length': 15020, 'unique_sentence2': 4884}, 'ben-hin': {'num_samples': 1174, 'number_of_characters': 682915, 'unique_pairs': 1174, 'min_sentence1_length': 64, 'average_sentence1_length': 287.33, 'max_sentence1_length': 1957, 'unique_sentence1': 1174, 'min_sentence2_length': 50, 'average_sentence2_length': 294.37, 'max_sentence2_length': 1980, 'unique_sentence2': 1174}, 'ben-mar': {'num_samples': 566, 'number_of_characters': 305353, 'unique_pairs': 566, 'min_sentence1_length': 50, 'average_sentence1_length': 271.83, 'max_sentence1_length': 1753, 'unique_sentence1': 566, 'min_sentence2_length': 57, 'average_sentence2_length': 267.66, 'max_sentence2_length': 1780, 'unique_sentence2': 566}, 'ben-urd': {'num_samples': 488, 'number_of_characters': 265698, 'unique_pairs': 488, 'min_sentence1_length': 61, 'average_sentence1_length': 269.23, 'max_sentence1_length': 1190, 'unique_sentence1': 488, 'min_sentence2_length': 62, 'average_sentence2_length': 275.23, 'max_sentence2_length': 1179, 'unique_sentence2': 488}, 'hin-mar': {'num_samples': 615, 'number_of_characters': 320880, 'unique_pairs': 615, 'min_sentence1_length': 58, 'average_sentence1_length': 265.89, 'max_sentence1_length': 1769, 'unique_sentence1': 615, 'min_sentence2_length': 58, 'average_sentence2_length': 255.87, 'max_sentence2_length': 1780, 'unique_sentence2': 615}, 'hin-urd': {'num_samples': 545, 'number_of_characters': 293939, 'unique_pairs': 545, 'min_sentence1_length': 63, 'average_sentence1_length': 271.89, 'max_sentence1_length': 1206, 'unique_sentence1': 545, 'min_sentence2_length': 62, 'average_sentence2_length': 267.45, 'max_sentence2_length': 1179, 'unique_sentence2': 545}, 'mar-urd': {'num_samples': 270, 'number_of_characters': 147706, 'unique_pairs': 270, 'min_sentence1_length': 63, 'average_sentence1_length': 270.9, 'max_sentence1_length': 1169, 'unique_sentence1': 270, 'min_sentence2_length': 66, 'average_sentence2_length': 276.16, 'max_sentence2_length': 1172, 'unique_sentence2': 270}, 'aze-kaz': {'num_samples': 412, 'number_of_characters': 230950, 'unique_pairs': 412, 'min_sentence1_length': 73, 'average_sentence1_length': 280.5, 'max_sentence1_length': 1824, 'unique_sentence1': 412, 'min_sentence2_length': 68, 'average_sentence2_length': 280.06, 'max_sentence2_length': 1855, 'unique_sentence2': 412}, 'aze-tur': {'num_samples': 388, 'number_of_characters': 205998, 'unique_pairs': 388, 'min_sentence1_length': 72, 'average_sentence1_length': 266.67, 'max_sentence1_length': 1824, 'unique_sentence1': 388, 'min_sentence2_length': 64, 'average_sentence2_length': 264.26, 'max_sentence2_length': 1838, 'unique_sentence2': 388}, 'kaz-tur': {'num_samples': 340, 'number_of_characters': 181572, 'unique_pairs': 340, 'min_sentence1_length': 68, 'average_sentence1_length': 267.44, 'max_sentence1_length': 1855, 'unique_sentence1': 340, 'min_sentence2_length': 64, 'average_sentence2_length': 266.59, 'max_sentence2_length': 1838, 'unique_sentence2': 340}, 'est-fin': {'num_samples': 790, 'number_of_characters': 551725, 'unique_pairs': 790, 'min_sentence1_length': 63, 'average_sentence1_length': 341.07, 'max_sentence1_length': 1829, 'unique_sentence1': 790, 'min_sentence2_length': 63, 'average_sentence2_length': 357.32, 'max_sentence2_length': 1815, 'unique_sentence2': 790}, 'est-hun': {'num_samples': 674, 'number_of_characters': 465779, 'unique_pairs': 674, 'min_sentence1_length': 72, 'average_sentence1_length': 329.88, 'max_sentence1_length': 2117, 'unique_sentence1': 674, 'min_sentence2_length': 73, 'average_sentence2_length': 361.18, 'max_sentence2_length': 2403, 'unique_sentence2': 674}, 'fin-hun': {'num_samples': 1542, 'number_of_characters': 1062678, 'unique_pairs': 1542, 'min_sentence1_length': 65, 'average_sentence1_length': 336.81, 'max_sentence1_length': 2500, 'unique_sentence1': 1542, 'min_sentence2_length': 66, 'average_sentence2_length': 352.35, 'max_sentence2_length': 2674, 'unique_sentence2': 1542}, 'ara-eng': {'num_samples': 5698, 'number_of_characters': 3662324, 'unique_pairs': 5698, 'min_sentence1_length': 53, 'average_sentence1_length': 302.59, 'max_sentence1_length': 2664, 'unique_sentence1': 5698, 'min_sentence2_length': 60, 'average_sentence2_length': 340.15, 'max_sentence2_length': 2811, 'unique_sentence2': 5698}, 'aze-eng': {'num_samples': 603, 'number_of_characters': 383857, 'unique_pairs': 603, 'min_sentence1_length': 58, 'average_sentence1_length': 323.52, 'max_sentence1_length': 1339, 'unique_sentence1': 603, 'min_sentence2_length': 69, 'average_sentence2_length': 313.05, 'max_sentence2_length': 1352, 'unique_sentence2': 603}, 'ben-eng': {'num_samples': 1367, 'number_of_characters': 866437, 'unique_pairs': 1367, 'min_sentence1_length': 50, 'average_sentence1_length': 318.67, 'max_sentence1_length': 1191, 'unique_sentence1': 1367, 'min_sentence2_length': 65, 'average_sentence2_length': 315.16, 'max_sentence2_length': 1094, 'unique_sentence2': 1367}, 'bul-eng': {'num_samples': 2133, 'number_of_characters': 1458078, 'unique_pairs': 2133, 'min_sentence1_length': 65, 'average_sentence1_length': 354.96, 'max_sentence1_length': 3016, 'unique_sentence1': 2133, 'min_sentence2_length': 65, 'average_sentence2_length': 328.62, 'max_sentence2_length': 2770, 'unique_sentence2': 2133}, 'cat-eng': {'num_samples': 1152, 'number_of_characters': 977495, 'unique_pairs': 1152, 'min_sentence1_length': 64, 'average_sentence1_length': 437.25, 'max_sentence1_length': 8113, 'unique_sentence1': 1152, 'min_sentence2_length': 59, 'average_sentence2_length': 411.27, 'max_sentence2_length': 7400, 'unique_sentence2': 1152}, 'ces-eng': {'num_samples': 3775, 'number_of_characters': 2651016, 'unique_pairs': 3775, 'min_sentence1_length': 54, 'average_sentence1_length': 347.59, 'max_sentence1_length': 2349, 'unique_sentence1': 3775, 'min_sentence2_length': 60, 'average_sentence2_length': 354.66, 'max_sentence2_length': 2401, 'unique_sentence2': 3775}, 'dan-eng': {'num_samples': 4512, 'number_of_characters': 3404958, 'unique_pairs': 4512, 'min_sentence1_length': 49, 'average_sentence1_length': 381.45, 'max_sentence1_length': 15317, 'unique_sentence1': 4512, 'min_sentence2_length': 56, 'average_sentence2_length': 373.19, 'max_sentence2_length': 14749, 'unique_sentence2': 4512}, 'deu-eng': {'num_samples': 37348, 'number_of_characters': 29811894, 'unique_pairs': 37348, 'min_sentence1_length': 53, 'average_sentence1_length': 423.63, 'max_sentence1_length': 6437, 'unique_sentence1': 37348, 'min_sentence2_length': 46, 'average_sentence2_length': 374.59, 'max_sentence2_length': 5781, 'unique_sentence2': 37348}, 'ell-eng': {'num_samples': 2790, 'number_of_characters': 2021300, 'unique_pairs': 2790, 'min_sentence1_length': 66, 'average_sentence1_length': 394.65, 'max_sentence1_length': 2963, 'unique_sentence1': 2790, 'min_sentence2_length': 61, 'average_sentence2_length': 329.83, 'max_sentence2_length': 3013, 'unique_sentence2': 2790}, 'eng-est': {'num_samples': 755, 'number_of_characters': 528417, 'unique_pairs': 755, 'min_sentence1_length': 58, 'average_sentence1_length': 352.58, 'max_sentence1_length': 1567, 'unique_sentence1': 755, 'min_sentence2_length': 62, 'average_sentence2_length': 347.31, 'max_sentence2_length': 1630, 'unique_sentence2': 755}, 'eng-fas': {'num_samples': 556, 'number_of_characters': 431685, 'unique_pairs': 556, 'min_sentence1_length': 59, 'average_sentence1_length': 396.65, 'max_sentence1_length': 5339, 'unique_sentence1': 556, 'min_sentence2_length': 68, 'average_sentence2_length': 379.76, 'max_sentence2_length': 4782, 'unique_sentence2': 556}, 'eng-fin': {'num_samples': 3443, 'number_of_characters': 2505517, 'unique_pairs': 3443, 'min_sentence1_length': 62, 'average_sentence1_length': 359.76, 'max_sentence1_length': 4412, 'unique_sentence1': 3443, 'min_sentence2_length': 57, 'average_sentence2_length': 367.96, 'max_sentence2_length': 4583, 'unique_sentence2': 3443}, 'eng-fra': {'num_samples': 37208, 'number_of_characters': 30609932, 'unique_pairs': 37208, 'min_sentence1_length': 52, 'average_sentence1_length': 375.68, 'max_sentence1_length': 14463, 'unique_sentence1': 37208, 'min_sentence2_length': 59, 'average_sentence2_length': 446.99, 'max_sentence2_length': 15312, 'unique_sentence2': 37208}, 'eng-heb': {'num_samples': 882, 'number_of_characters': 541517, 'unique_pairs': 882, 'min_sentence1_length': 80, 'average_sentence1_length': 339.58, 'max_sentence1_length': 16651, 'unique_sentence1': 882, 'min_sentence2_length': 62, 'average_sentence2_length': 274.39, 'max_sentence2_length': 14483, 'unique_sentence2': 882}, 'eng-hin': {'num_samples': 2219, 'number_of_characters': 1277126, 'unique_pairs': 2219, 'min_sentence1_length': 59, 'average_sentence1_length': 284.61, 'max_sentence1_length': 2439, 'unique_sentence1': 2219, 'min_sentence2_length': 50, 'average_sentence2_length': 290.93, 'max_sentence2_length': 2496, 'unique_sentence2': 2219}, 'eng-hrv': {'num_samples': 336, 'number_of_characters': 247780, 'unique_pairs': 336, 'min_sentence1_length': 79, 'average_sentence1_length': 370.68, 'max_sentence1_length': 1657, 'unique_sentence1': 336, 'min_sentence2_length': 59, 'average_sentence2_length': 366.76, 'max_sentence2_length': 1393, 'unique_sentence2': 336}, 'eng-hun': {'num_samples': 2185, 'number_of_characters': 1517498, 'unique_pairs': 2185, 'min_sentence1_length': 61, 'average_sentence1_length': 334.86, 'max_sentence1_length': 1664, 'unique_sentence1': 2185, 'min_sentence2_length': 55, 'average_sentence2_length': 359.65, 'max_sentence2_length': 1814, 'unique_sentence2': 2185}, 'eng-ind': {'num_samples': 3454, 'number_of_characters': 2493124, 'unique_pairs': 3454, 'min_sentence1_length': 60, 'average_sentence1_length': 344.73, 'max_sentence1_length': 4253, 'unique_sentence1': 3454, 'min_sentence2_length': 67, 'average_sentence2_length': 377.07, 'max_sentence2_length': 4132, 'unique_sentence2': 3454}, 'eng-isl': {'num_samples': 358, 'number_of_characters': 237004, 'unique_pairs': 358, 'min_sentence1_length': 72, 'average_sentence1_length': 329.61, 'max_sentence1_length': 1112, 'unique_sentence1': 358, 'min_sentence2_length': 69, 'average_sentence2_length': 332.41, 'max_sentence2_length': 1206, 'unique_sentence2': 358}, 'eng-ita': {'num_samples': 19661, 'number_of_characters': 15893506, 'unique_pairs': 19661, 'min_sentence1_length': 54, 'average_sentence1_length': 381.62, 'max_sentence1_length': 14540, 'unique_sentence1': 19661, 'min_sentence2_length': 49, 'average_sentence2_length': 426.75, 'max_sentence2_length': 16311, 'unique_sentence2': 19661}, 'eng-jpn': {'num_samples': 3807, 'number_of_characters': 1890484, 'unique_pairs': 3807, 'min_sentence1_length': 66, 'average_sentence1_length': 323.35, 'max_sentence1_length': 1738, 'unique_sentence1': 3807, 'min_sentence2_length': 47, 'average_sentence2_length': 173.23, 'max_sentence2_length': 856, 'unique_sentence2': 3807}, 'eng-kaz': {'num_samples': 346, 'number_of_characters': 245791, 'unique_pairs': 346, 'min_sentence1_length': 72, 'average_sentence1_length': 349.87, 'max_sentence1_length': 2126, 'unique_sentence1': 346, 'min_sentence2_length': 79, 'average_sentence2_length': 360.51, 'max_sentence2_length': 2052, 'unique_sentence2': 346}, 'eng-kor': {'num_samples': 2558, 'number_of_characters': 1358932, 'unique_pairs': 2558, 'min_sentence1_length': 58, 'average_sentence1_length': 342.45, 'max_sentence1_length': 11054, 'unique_sentence1': 2558, 'min_sentence2_length': 47, 'average_sentence2_length': 188.79, 'max_sentence2_length': 6212, 'unique_sentence2': 2558}, 'eng-lav': {'num_samples': 1079, 'number_of_characters': 765748, 'unique_pairs': 1079, 'min_sentence1_length': 63, 'average_sentence1_length': 352.18, 'max_sentence1_length': 3960, 'unique_sentence1': 1079, 'min_sentence2_length': 65, 'average_sentence2_length': 357.5, 'max_sentence2_length': 3996, 'unique_sentence2': 1079}, 'eng-lit': {'num_samples': 1185, 'number_of_characters': 884963, 'unique_pairs': 1185, 'min_sentence1_length': 70, 'average_sentence1_length': 369.4, 'max_sentence1_length': 3955, 'unique_sentence1': 1185, 'min_sentence2_length': 87, 'average_sentence2_length': 377.4, 'max_sentence2_length': 3841, 'unique_sentence2': 1185}, 'eng-mar': {'num_samples': 280, 'number_of_characters': 153207, 'unique_pairs': 280, 'min_sentence1_length': 65, 'average_sentence1_length': 274.54, 'max_sentence1_length': 883, 'unique_sentence1': 280, 'min_sentence2_length': 57, 'average_sentence2_length': 272.62, 'max_sentence2_length': 943, 'unique_sentence2': 280}, 'eng-msa': {'num_samples': 469, 'number_of_characters': 205989, 'unique_pairs': 469, 'min_sentence1_length': 58, 'average_sentence1_length': 209.64, 'max_sentence1_length': 1842, 'unique_sentence1': 469, 'min_sentence2_length': 58, 'average_sentence2_length': 229.57, 'max_sentence2_length': 2082, 'unique_sentence2': 469}, 'eng-nld': {'num_samples': 15613, 'number_of_characters': 12313146, 'unique_pairs': 15613, 'min_sentence1_length': 55, 'average_sentence1_length': 378.03, 'max_sentence1_length': 15297, 'unique_sentence1': 15613, 'min_sentence2_length': 54, 'average_sentence2_length': 410.62, 'max_sentence2_length': 16485, 'unique_sentence2': 15613}, 'eng-nor': {'num_samples': 2666, 'number_of_characters': 1883809, 'unique_pairs': 2666, 'min_sentence1_length': 55, 'average_sentence1_length': 353.08, 'max_sentence1_length': 2834, 'unique_sentence1': 2666, 'min_sentence2_length': 56, 'average_sentence2_length': 353.53, 'max_sentence2_length': 2795, 'unique_sentence2': 2666}, 'eng-pol': {'num_samples': 6868, 'number_of_characters': 4946440, 'unique_pairs': 6868, 'min_sentence1_length': 54, 'average_sentence1_length': 349.29, 'max_sentence1_length': 4412, 'unique_sentence1': 6868, 'min_sentence2_length': 57, 'average_sentence2_length': 370.92, 'max_sentence2_length': 5103, 'unique_sentence2': 6868}, 'eng-por': {'num_samples': 12406, 'number_of_characters': 9040635, 'unique_pairs': 12406, 'min_sentence1_length': 58, 'average_sentence1_length': 347.94, 'max_sentence1_length': 6643, 'unique_sentence1': 12406, 'min_sentence2_length': 56, 'average_sentence2_length': 380.79, 'max_sentence2_length': 7445, 'unique_sentence2': 12406}, 'eng-ron': {'num_samples': 3039, 'number_of_characters': 2119434, 'unique_pairs': 3039, 'min_sentence1_length': 61, 'average_sentence1_length': 328.93, 'max_sentence1_length': 2085, 'unique_sentence1': 3039, 'min_sentence2_length': 70, 'average_sentence2_length': 368.48, 'max_sentence2_length': 2421, 'unique_sentence2': 3039}, 'eng-rus': {'num_samples': 9360, 'number_of_characters': 6547558, 'unique_pairs': 9360, 'min_sentence1_length': 54, 'average_sentence1_length': 340.47, 'max_sentence1_length': 3382, 'unique_sentence1': 9360, 'min_sentence2_length': 56, 'average_sentence2_length': 359.06, 'max_sentence2_length': 4018, 'unique_sentence2': 9360}, 'eng-slk': {'num_samples': 1823, 'number_of_characters': 1287409, 'unique_pairs': 1823, 'min_sentence1_length': 64, 'average_sentence1_length': 353.21, 'max_sentence1_length': 4412, 'unique_sentence1': 1823, 'min_sentence2_length': 68, 'average_sentence2_length': 352.99, 'max_sentence2_length': 4641, 'unique_sentence2': 1823}, 'eng-slv': {'num_samples': 1450, 'number_of_characters': 948874, 'unique_pairs': 1450, 'min_sentence1_length': 61, 'average_sentence1_length': 327.63, 'max_sentence1_length': 2058, 'unique_sentence1': 1450, 'min_sentence2_length': 59, 'average_sentence2_length': 326.77, 'max_sentence2_length': 2049, 'unique_sentence2': 1450}, 'eng-spa': {'num_samples': 35446, 'number_of_characters': 29514190, 'unique_pairs': 35446, 'min_sentence1_length': 52, 'average_sentence1_length': 391.45, 'max_sentence1_length': 16651, 'unique_sentence1': 35446, 'min_sentence2_length': 47, 'average_sentence2_length': 441.2, 'max_sentence2_length': 21081, 'unique_sentence2': 35446}, 'eng-srp': {'num_samples': 303, 'number_of_characters': 196853, 'unique_pairs': 303, 'min_sentence1_length': 85, 'average_sentence1_length': 323.19, 'max_sentence1_length': 1054, 'unique_sentence1': 303, 'min_sentence2_length': 78, 'average_sentence2_length': 326.49, 'max_sentence2_length': 1025, 'unique_sentence2': 303}, 'eng-swe': {'num_samples': 6005, 'number_of_characters': 4500597, 'unique_pairs': 6005, 'min_sentence1_length': 54, 'average_sentence1_length': 373.82, 'max_sentence1_length': 14540, 'unique_sentence1': 6005, 'min_sentence2_length': 56, 'average_sentence2_length': 375.65, 'max_sentence2_length': 15020, 'unique_sentence2': 6005}, 'eng-tgl': {'num_samples': 551, 'number_of_characters': 426303, 'unique_pairs': 551, 'min_sentence1_length': 61, 'average_sentence1_length': 346.09, 'max_sentence1_length': 1958, 'unique_sentence1': 551, 'min_sentence2_length': 57, 'average_sentence2_length': 427.6, 'max_sentence2_length': 2422, 'unique_sentence2': 551}, 'eng-tha': {'num_samples': 814, 'number_of_characters': 426960, 'unique_pairs': 814, 'min_sentence1_length': 67, 'average_sentence1_length': 274.76, 'max_sentence1_length': 1842, 'unique_sentence1': 814, 'min_sentence2_length': 52, 'average_sentence2_length': 249.76, 'max_sentence2_length': 1669, 'unique_sentence2': 814}, 'eng-tur': {'num_samples': 4606, 'number_of_characters': 3315390, 'unique_pairs': 4606, 'min_sentence1_length': 58, 'average_sentence1_length': 353.38, 'max_sentence1_length': 5595, 'unique_sentence1': 4606, 'min_sentence2_length': 58, 'average_sentence2_length': 366.42, 'max_sentence2_length': 6024, 'unique_sentence2': 4606}, 'eng-ukr': {'num_samples': 3778, 'number_of_characters': 2803318, 'unique_pairs': 3778, 'min_sentence1_length': 59, 'average_sentence1_length': 367.93, 'max_sentence1_length': 3627, 'unique_sentence1': 3778, 'min_sentence2_length': 49, 'average_sentence2_length': 374.08, 'max_sentence2_length': 3991, 'unique_sentence2': 3778}, 'eng-urd': {'num_samples': 268, 'number_of_characters': 137458, 'unique_pairs': 268, 'min_sentence1_length': 67, 'average_sentence1_length': 253.83, 'max_sentence1_length': 736, 'unique_sentence1': 268, 'min_sentence2_length': 57, 'average_sentence2_length': 259.07, 'max_sentence2_length': 795, 'unique_sentence2': 268}, 'eng-vie': {'num_samples': 1264, 'number_of_characters': 866332, 'unique_pairs': 1264, 'min_sentence1_length': 60, 'average_sentence1_length': 330.71, 'max_sentence1_length': 4253, 'unique_sentence1': 1264, 'min_sentence2_length': 59, 'average_sentence2_length': 354.68, 'max_sentence2_length': 3780, 'unique_sentence2': 1264}, 'eng-zho': {'num_samples': 4959, 'number_of_characters': 2696879, 'unique_pairs': 4959, 'min_sentence1_length': 75, 'average_sentence1_length': 402.38, 'max_sentence1_length': 14540, 'unique_sentence1': 4959, 'min_sentence2_length': 41, 'average_sentence2_length': 141.46, 'max_sentence2_length': 4500, 'unique_sentence2': 4959}}}}
WebFAQBitextMiningQuestions (Michael Dinzinger, 2025) ['ara', 'aze', 'ben', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fas', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun', 'ind', 'isl', 'ita', 'jpn', 'kat', 'kaz', 'kor', 'lav', 'lit', 'mar', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'srp', 'swe', 'tgl', 'tha', 'tur', 'ukr', 'urd', 'vie', 'zho'] BitextMining s2s [Web, Written] {'default': 682057} {'default': {'num_samples': 682057, 'number_of_characters': 71984237, 'unique_pairs': 681597, 'min_sentence1_length': 6, 'average_sentence1_length': 52.72, 'max_sentence1_length': 847, 'unique_sentence1': 379086, 'min_sentence2_length': 6, 'average_sentence2_length': 52.82, 'max_sentence2_length': 773, 'unique_sentence2': 398743, 'hf_subset_descriptive_stats': {'ara-fas': {'num_samples': 609, 'number_of_characters': 53319, 'unique_pairs': 609, 'min_sentence1_length': 10, 'average_sentence1_length': 41.9, 'max_sentence1_length': 164, 'unique_sentence1': 609, 'min_sentence2_length': 11, 'average_sentence2_length': 45.65, 'max_sentence2_length': 177, 'unique_sentence2': 609}, 'ara-heb': {'num_samples': 978, 'number_of_characters': 93188, 'unique_pairs': 978, 'min_sentence1_length': 10, 'average_sentence1_length': 48.34, 'max_sentence1_length': 158, 'unique_sentence1': 978, 'min_sentence2_length': 9, 'average_sentence2_length': 46.95, 'max_sentence2_length': 160, 'unique_sentence2': 978}, 'jpn-kor': {'num_samples': 4820, 'number_of_characters': 310427, 'unique_pairs': 4820, 'min_sentence1_length': 10, 'average_sentence1_length': 31.91, 'max_sentence1_length': 146, 'unique_sentence1': 4820, 'min_sentence2_length': 7, 'average_sentence2_length': 32.5, 'max_sentence2_length': 208, 'unique_sentence2': 4820}, 'jpn-vie': {'num_samples': 1356, 'number_of_characters': 110198, 'unique_pairs': 1356, 'min_sentence1_length': 10, 'average_sentence1_length': 28.55, 'max_sentence1_length': 152, 'unique_sentence1': 1356, 'min_sentence2_length': 10, 'average_sentence2_length': 52.72, 'max_sentence2_length': 344, 'unique_sentence2': 1356}, 'jpn-zho': {'num_samples': 1728, 'number_of_characters': 86936, 'unique_pairs': 1728, 'min_sentence1_length': 7, 'average_sentence1_length': 29.03, 'max_sentence1_length': 135, 'unique_sentence1': 1728, 'min_sentence2_length': 8, 'average_sentence2_length': 21.28, 'max_sentence2_length': 126, 'unique_sentence2': 1728}, 'kor-vie': {'num_samples': 1386, 'number_of_characters': 116716, 'unique_pairs': 1386, 'min_sentence1_length': 10, 'average_sentence1_length': 30.34, 'max_sentence1_length': 108, 'unique_sentence1': 1386, 'min_sentence2_length': 10, 'average_sentence2_length': 53.87, 'max_sentence2_length': 176, 'unique_sentence2': 1386}, 'kor-zho': {'num_samples': 1087, 'number_of_characters': 56152, 'unique_pairs': 1087, 'min_sentence1_length': 10, 'average_sentence1_length': 30.24, 'max_sentence1_length': 109, 'unique_sentence1': 1087, 'min_sentence2_length': 10, 'average_sentence2_length': 21.42, 'max_sentence2_length': 95, 'unique_sentence2': 1087}, 'vie-zho': {'num_samples': 646, 'number_of_characters': 46981, 'unique_pairs': 646, 'min_sentence1_length': 11, 'average_sentence1_length': 52.13, 'max_sentence1_length': 258, 'unique_sentence1': 646, 'min_sentence2_length': 10, 'average_sentence2_length': 20.6, 'max_sentence2_length': 78, 'unique_sentence2': 646}, 'ind-msa': {'num_samples': 455, 'number_of_characters': 61802, 'unique_pairs': 455, 'min_sentence1_length': 17, 'average_sentence1_length': 69.13, 'max_sentence1_length': 135, 'unique_sentence1': 455, 'min_sentence2_length': 16, 'average_sentence2_length': 66.7, 'max_sentence2_length': 156, 'unique_sentence2': 455}, 'ind-tgl': {'num_samples': 378, 'number_of_characters': 40452, 'unique_pairs': 378, 'min_sentence1_length': 13, 'average_sentence1_length': 51.3, 'max_sentence1_length': 122, 'unique_sentence1': 378, 'min_sentence2_length': 13, 'average_sentence2_length': 55.71, 'max_sentence2_length': 136, 'unique_sentence2': 378}, 'ind-tha': {'num_samples': 1258, 'number_of_characters': 129949, 'unique_pairs': 1258, 'min_sentence1_length': 11, 'average_sentence1_length': 56.66, 'max_sentence1_length': 159, 'unique_sentence1': 1258, 'min_sentence2_length': 11, 'average_sentence2_length': 46.64, 'max_sentence2_length': 153, 'unique_sentence2': 1258}, 'bul-ces': {'num_samples': 1485, 'number_of_characters': 156647, 'unique_pairs': 1485, 'min_sentence1_length': 12, 'average_sentence1_length': 55.73, 'max_sentence1_length': 273, 'unique_sentence1': 1485, 'min_sentence2_length': 10, 'average_sentence2_length': 49.75, 'max_sentence2_length': 265, 'unique_sentence2': 1485}, 'bul-lav': {'num_samples': 710, 'number_of_characters': 72767, 'unique_pairs': 710, 'min_sentence1_length': 14, 'average_sentence1_length': 52.81, 'max_sentence1_length': 169, 'unique_sentence1': 710, 'min_sentence2_length': 13, 'average_sentence2_length': 49.68, 'max_sentence2_length': 137, 'unique_sentence2': 710}, 'bul-lit': {'num_samples': 803, 'number_of_characters': 83819, 'unique_pairs': 803, 'min_sentence1_length': 14, 'average_sentence1_length': 53.81, 'max_sentence1_length': 223, 'unique_sentence1': 803, 'min_sentence2_length': 14, 'average_sentence2_length': 50.57, 'max_sentence2_length': 188, 'unique_sentence2': 803}, 'bul-pol': {'num_samples': 1635, 'number_of_characters': 182803, 'unique_pairs': 1635, 'min_sentence1_length': 12, 'average_sentence1_length': 56.71, 'max_sentence1_length': 224, 'unique_sentence1': 1635, 'min_sentence2_length': 14, 'average_sentence2_length': 55.1, 'max_sentence2_length': 223, 'unique_sentence2': 1635}, 'bul-rus': {'num_samples': 1476, 'number_of_characters': 171755, 'unique_pairs': 1476, 'min_sentence1_length': 11, 'average_sentence1_length': 58.61, 'max_sentence1_length': 273, 'unique_sentence1': 1476, 'min_sentence2_length': 13, 'average_sentence2_length': 57.75, 'max_sentence2_length': 263, 'unique_sentence2': 1476}, 'bul-slk': {'num_samples': 1154, 'number_of_characters': 122062, 'unique_pairs': 1154, 'min_sentence1_length': 14, 'average_sentence1_length': 55.47, 'max_sentence1_length': 273, 'unique_sentence1': 1154, 'min_sentence2_length': 12, 'average_sentence2_length': 50.31, 'max_sentence2_length': 234, 'unique_sentence2': 1154}, 'bul-slv': {'num_samples': 1034, 'number_of_characters': 120376, 'unique_pairs': 1034, 'min_sentence1_length': 13, 'average_sentence1_length': 60.39, 'max_sentence1_length': 273, 'unique_sentence1': 1034, 'min_sentence2_length': 12, 'average_sentence2_length': 56.03, 'max_sentence2_length': 252, 'unique_sentence2': 1034}, 'bul-srp': {'num_samples': 296, 'number_of_characters': 29879, 'unique_pairs': 296, 'min_sentence1_length': 17, 'average_sentence1_length': 51.28, 'max_sentence1_length': 146, 'unique_sentence1': 296, 'min_sentence2_length': 10, 'average_sentence2_length': 49.66, 'max_sentence2_length': 118, 'unique_sentence2': 296}, 'bul-ukr': {'num_samples': 1074, 'number_of_characters': 120520, 'unique_pairs': 1074, 'min_sentence1_length': 11, 'average_sentence1_length': 57.26, 'max_sentence1_length': 155, 'unique_sentence1': 1074, 'min_sentence2_length': 12, 'average_sentence2_length': 54.96, 'max_sentence2_length': 152, 'unique_sentence2': 1074}, 'ces-lav': {'num_samples': 875, 'number_of_characters': 81949, 'unique_pairs': 875, 'min_sentence1_length': 12, 'average_sentence1_length': 45.45, 'max_sentence1_length': 133, 'unique_sentence1': 875, 'min_sentence2_length': 13, 'average_sentence2_length': 48.2, 'max_sentence2_length': 151, 'unique_sentence2': 875}, 'ces-lit': {'num_samples': 1002, 'number_of_characters': 95191, 'unique_pairs': 1002, 'min_sentence1_length': 10, 'average_sentence1_length': 45.99, 'max_sentence1_length': 129, 'unique_sentence1': 1002, 'min_sentence2_length': 12, 'average_sentence2_length': 49.01, 'max_sentence2_length': 177, 'unique_sentence2': 1002}, 'ces-pol': {'num_samples': 3367, 'number_of_characters': 335068, 'unique_pairs': 3367, 'min_sentence1_length': 10, 'average_sentence1_length': 47.42, 'max_sentence1_length': 211, 'unique_sentence1': 3367, 'min_sentence2_length': 13, 'average_sentence2_length': 52.09, 'max_sentence2_length': 231, 'unique_sentence2': 3367}, 'ces-rus': {'num_samples': 2144, 'number_of_characters': 221574, 'unique_pairs': 2144, 'min_sentence1_length': 10, 'average_sentence1_length': 48.92, 'max_sentence1_length': 265, 'unique_sentence1': 2144, 'min_sentence2_length': 13, 'average_sentence2_length': 54.42, 'max_sentence2_length': 263, 'unique_sentence2': 2144}, 'ces-slk': {'num_samples': 2551, 'number_of_characters': 253025, 'unique_pairs': 2551, 'min_sentence1_length': 10, 'average_sentence1_length': 48.91, 'max_sentence1_length': 265, 'unique_sentence1': 2551, 'min_sentence2_length': 10, 'average_sentence2_length': 50.28, 'max_sentence2_length': 237, 'unique_sentence2': 2551}, 'ces-slv': {'num_samples': 1370, 'number_of_characters': 145413, 'unique_pairs': 1370, 'min_sentence1_length': 10, 'average_sentence1_length': 51.01, 'max_sentence1_length': 265, 'unique_sentence1': 1370, 'min_sentence2_length': 11, 'average_sentence2_length': 55.13, 'max_sentence2_length': 252, 'unique_sentence2': 1370}, 'ces-srp': {'num_samples': 362, 'number_of_characters': 37517, 'unique_pairs': 362, 'min_sentence1_length': 15, 'average_sentence1_length': 49.99, 'max_sentence1_length': 129, 'unique_sentence1': 362, 'min_sentence2_length': 16, 'average_sentence2_length': 53.65, 'max_sentence2_length': 129, 'unique_sentence2': 362}, 'ces-ukr': {'num_samples': 1285, 'number_of_characters': 133762, 'unique_pairs': 1285, 'min_sentence1_length': 10, 'average_sentence1_length': 49.67, 'max_sentence1_length': 233, 'unique_sentence1': 1285, 'min_sentence2_length': 12, 'average_sentence2_length': 54.42, 'max_sentence2_length': 253, 'unique_sentence2': 1285}, 'hrv-slk': {'num_samples': 313, 'number_of_characters': 38510, 'unique_pairs': 313, 'min_sentence1_length': 16, 'average_sentence1_length': 61.02, 'max_sentence1_length': 174, 'unique_sentence1': 313, 'min_sentence2_length': 15, 'average_sentence2_length': 62.02, 'max_sentence2_length': 182, 'unique_sentence2': 313}, 'kat-rus': {'num_samples': 262, 'number_of_characters': 26840, 'unique_pairs': 262, 'min_sentence1_length': 14, 'average_sentence1_length': 51.6, 'max_sentence1_length': 179, 'unique_sentence1': 262, 'min_sentence2_length': 16, 'average_sentence2_length': 50.85, 'max_sentence2_length': 186, 'unique_sentence2': 262}, 'lav-lit': {'num_samples': 1061, 'number_of_characters': 101845, 'unique_pairs': 1061, 'min_sentence1_length': 11, 'average_sentence1_length': 47.87, 'max_sentence1_length': 283, 'unique_sentence1': 1061, 'min_sentence2_length': 12, 'average_sentence2_length': 48.12, 'max_sentence2_length': 268, 'unique_sentence2': 1061}, 'lav-pol': {'num_samples': 951, 'number_of_characters': 94163, 'unique_pairs': 951, 'min_sentence1_length': 11, 'average_sentence1_length': 48.29, 'max_sentence1_length': 172, 'unique_sentence1': 951, 'min_sentence2_length': 14, 'average_sentence2_length': 50.72, 'max_sentence2_length': 208, 'unique_sentence2': 951}, 'lav-rus': {'num_samples': 1412, 'number_of_characters': 141609, 'unique_pairs': 1412, 'min_sentence1_length': 11, 'average_sentence1_length': 49.2, 'max_sentence1_length': 146, 'unique_sentence1': 1412, 'min_sentence2_length': 13, 'average_sentence2_length': 51.09, 'max_sentence2_length': 182, 'unique_sentence2': 1412}, 'lav-slk': {'num_samples': 789, 'number_of_characters': 72654, 'unique_pairs': 789, 'min_sentence1_length': 13, 'average_sentence1_length': 46.75, 'max_sentence1_length': 172, 'unique_sentence1': 789, 'min_sentence2_length': 12, 'average_sentence2_length': 45.33, 'max_sentence2_length': 162, 'unique_sentence2': 789}, 'lav-slv': {'num_samples': 518, 'number_of_characters': 48873, 'unique_pairs': 518, 'min_sentence1_length': 13, 'average_sentence1_length': 46.73, 'max_sentence1_length': 126, 'unique_sentence1': 518, 'min_sentence2_length': 13, 'average_sentence2_length': 47.62, 'max_sentence2_length': 125, 'unique_sentence2': 518}, 'lav-ukr': {'num_samples': 579, 'number_of_characters': 52695, 'unique_pairs': 579, 'min_sentence1_length': 11, 'average_sentence1_length': 45.8, 'max_sentence1_length': 151, 'unique_sentence1': 579, 'min_sentence2_length': 12, 'average_sentence2_length': 45.21, 'max_sentence2_length': 170, 'unique_sentence2': 579}, 'lit-pol': {'num_samples': 1026, 'number_of_characters': 102814, 'unique_pairs': 1026, 'min_sentence1_length': 13, 'average_sentence1_length': 48.94, 'max_sentence1_length': 188, 'unique_sentence1': 1026, 'min_sentence2_length': 15, 'average_sentence2_length': 51.27, 'max_sentence2_length': 206, 'unique_sentence2': 1026}, 'lit-rus': {'num_samples': 961, 'number_of_characters': 96822, 'unique_pairs': 961, 'min_sentence1_length': 12, 'average_sentence1_length': 49.5, 'max_sentence1_length': 155, 'unique_sentence1': 961, 'min_sentence2_length': 12, 'average_sentence2_length': 51.25, 'max_sentence2_length': 182, 'unique_sentence2': 961}, 'lit-slk': {'num_samples': 859, 'number_of_characters': 82605, 'unique_pairs': 859, 'min_sentence1_length': 12, 'average_sentence1_length': 49.03, 'max_sentence1_length': 188, 'unique_sentence1': 859, 'min_sentence2_length': 10, 'average_sentence2_length': 47.13, 'max_sentence2_length': 195, 'unique_sentence2': 859}, 'lit-slv': {'num_samples': 607, 'number_of_characters': 58755, 'unique_pairs': 607, 'min_sentence1_length': 12, 'average_sentence1_length': 48.06, 'max_sentence1_length': 155, 'unique_sentence1': 607, 'min_sentence2_length': 11, 'average_sentence2_length': 48.74, 'max_sentence2_length': 145, 'unique_sentence2': 607}, 'lit-ukr': {'num_samples': 639, 'number_of_characters': 59450, 'unique_pairs': 639, 'min_sentence1_length': 14, 'average_sentence1_length': 46.94, 'max_sentence1_length': 149, 'unique_sentence1': 639, 'min_sentence2_length': 13, 'average_sentence2_length': 46.1, 'max_sentence2_length': 170, 'unique_sentence2': 639}, 'pol-rus': {'num_samples': 5014, 'number_of_characters': 536093, 'unique_pairs': 5014, 'min_sentence1_length': 12, 'average_sentence1_length': 53.29, 'max_sentence1_length': 424, 'unique_sentence1': 5014, 'min_sentence2_length': 11, 'average_sentence2_length': 53.63, 'max_sentence2_length': 456, 'unique_sentence2': 5014}, 'pol-slk': {'num_samples': 1918, 'number_of_characters': 196496, 'unique_pairs': 1918, 'min_sentence1_length': 14, 'average_sentence1_length': 53.03, 'max_sentence1_length': 206, 'unique_sentence1': 1918, 'min_sentence2_length': 10, 'average_sentence2_length': 49.42, 'max_sentence2_length': 194, 'unique_sentence2': 1918}, 'pol-slv': {'num_samples': 1382, 'number_of_characters': 160396, 'unique_pairs': 1382, 'min_sentence1_length': 9, 'average_sentence1_length': 57.59, 'max_sentence1_length': 180, 'unique_sentence1': 1382, 'min_sentence2_length': 8, 'average_sentence2_length': 58.47, 'max_sentence2_length': 146, 'unique_sentence2': 1382}, 'pol-srp': {'num_samples': 492, 'number_of_characters': 54772, 'unique_pairs': 492, 'min_sentence1_length': 19, 'average_sentence1_length': 54.55, 'max_sentence1_length': 148, 'unique_sentence1': 492, 'min_sentence2_length': 16, 'average_sentence2_length': 56.77, 'max_sentence2_length': 129, 'unique_sentence2': 492}, 'pol-ukr': {'num_samples': 2370, 'number_of_characters': 251424, 'unique_pairs': 2370, 'min_sentence1_length': 14, 'average_sentence1_length': 53.8, 'max_sentence1_length': 305, 'unique_sentence1': 2370, 'min_sentence2_length': 12, 'average_sentence2_length': 52.29, 'max_sentence2_length': 202, 'unique_sentence2': 2370}, 'rus-slk': {'num_samples': 1263, 'number_of_characters': 136750, 'unique_pairs': 1263, 'min_sentence1_length': 13, 'average_sentence1_length': 56.65, 'max_sentence1_length': 316, 'unique_sentence1': 1263, 'min_sentence2_length': 10, 'average_sentence2_length': 51.63, 'max_sentence2_length': 309, 'unique_sentence2': 1263}, 'rus-slv': {'num_samples': 1096, 'number_of_characters': 133263, 'unique_pairs': 1096, 'min_sentence1_length': 14, 'average_sentence1_length': 62.56, 'max_sentence1_length': 263, 'unique_sentence1': 1096, 'min_sentence2_length': 11, 'average_sentence2_length': 59.03, 'max_sentence2_length': 252, 'unique_sentence2': 1096}, 'rus-srp': {'num_samples': 455, 'number_of_characters': 51800, 'unique_pairs': 455, 'min_sentence1_length': 17, 'average_sentence1_length': 57.52, 'max_sentence1_length': 122, 'unique_sentence1': 455, 'min_sentence2_length': 12, 'average_sentence2_length': 56.33, 'max_sentence2_length': 129, 'unique_sentence2': 455}, 'rus-ukr': {'num_samples': 15251, 'number_of_characters': 1504266, 'unique_pairs': 15251, 'min_sentence1_length': 10, 'average_sentence1_length': 49.98, 'max_sentence1_length': 308, 'unique_sentence1': 15251, 'min_sentence2_length': 10, 'average_sentence2_length': 48.66, 'max_sentence2_length': 353, 'unique_sentence2': 15251}, 'slk-slv': {'num_samples': 1259, 'number_of_characters': 133621, 'unique_pairs': 1259, 'min_sentence1_length': 10, 'average_sentence1_length': 50.42, 'max_sentence1_length': 234, 'unique_sentence1': 1259, 'min_sentence2_length': 11, 'average_sentence2_length': 55.71, 'max_sentence2_length': 252, 'unique_sentence2': 1259}, 'slk-srp': {'num_samples': 561, 'number_of_characters': 57637, 'unique_pairs': 561, 'min_sentence1_length': 15, 'average_sentence1_length': 47.93, 'max_sentence1_length': 117, 'unique_sentence1': 561, 'min_sentence2_length': 16, 'average_sentence2_length': 54.81, 'max_sentence2_length': 112, 'unique_sentence2': 561}, 'slk-ukr': {'num_samples': 944, 'number_of_characters': 90612, 'unique_pairs': 944, 'min_sentence1_length': 10, 'average_sentence1_length': 46.87, 'max_sentence1_length': 237, 'unique_sentence1': 944, 'min_sentence2_length': 12, 'average_sentence2_length': 49.11, 'max_sentence2_length': 253, 'unique_sentence2': 944}, 'slv-srp': {'num_samples': 499, 'number_of_characters': 60828, 'unique_pairs': 499, 'min_sentence1_length': 16, 'average_sentence1_length': 63.64, 'max_sentence1_length': 122, 'unique_sentence1': 499, 'min_sentence2_length': 16, 'average_sentence2_length': 58.26, 'max_sentence2_length': 118, 'unique_sentence2': 499}, 'slv-ukr': {'num_samples': 733, 'number_of_characters': 86586, 'unique_pairs': 733, 'min_sentence1_length': 11, 'average_sentence1_length': 57.88, 'max_sentence1_length': 124, 'unique_sentence1': 733, 'min_sentence2_length': 12, 'average_sentence2_length': 60.24, 'max_sentence2_length': 132, 'unique_sentence2': 733}, 'cat-deu': {'num_samples': 302, 'number_of_characters': 33752, 'unique_pairs': 302, 'min_sentence1_length': 13, 'average_sentence1_length': 53.35, 'max_sentence1_length': 255, 'unique_sentence1': 302, 'min_sentence2_length': 12, 'average_sentence2_length': 58.41, 'max_sentence2_length': 269, 'unique_sentence2': 302}, 'cat-fra': {'num_samples': 598, 'number_of_characters': 69263, 'unique_pairs': 598, 'min_sentence1_length': 14, 'average_sentence1_length': 53.4, 'max_sentence1_length': 202, 'unique_sentence1': 598, 'min_sentence2_length': 16, 'average_sentence2_length': 62.42, 'max_sentence2_length': 235, 'unique_sentence2': 598}, 'cat-ita': {'num_samples': 418, 'number_of_characters': 43550, 'unique_pairs': 418, 'min_sentence1_length': 15, 'average_sentence1_length': 50.67, 'max_sentence1_length': 195, 'unique_sentence1': 418, 'min_sentence2_length': 15, 'average_sentence2_length': 53.51, 'max_sentence2_length': 230, 'unique_sentence2': 418}, 'cat-por': {'num_samples': 370, 'number_of_characters': 36411, 'unique_pairs': 370, 'min_sentence1_length': 15, 'average_sentence1_length': 48.72, 'max_sentence1_length': 193, 'unique_sentence1': 370, 'min_sentence2_length': 15, 'average_sentence2_length': 49.69, 'max_sentence2_length': 211, 'unique_sentence2': 370}, 'cat-spa': {'num_samples': 2648, 'number_of_characters': 285897, 'unique_pairs': 2648, 'min_sentence1_length': 10, 'average_sentence1_length': 52.98, 'max_sentence1_length': 216, 'unique_sentence1': 2648, 'min_sentence2_length': 11, 'average_sentence2_length': 54.98, 'max_sentence2_length': 247, 'unique_sentence2': 2648}, 'dan-deu': {'num_samples': 4337, 'number_of_characters': 463434, 'unique_pairs': 4337, 'min_sentence1_length': 11, 'average_sentence1_length': 50.84, 'max_sentence1_length': 198, 'unique_sentence1': 4337, 'min_sentence2_length': 11, 'average_sentence2_length': 56.02, 'max_sentence2_length': 224, 'unique_sentence2': 4337}, 'dan-fra': {'num_samples': 3802, 'number_of_characters': 434788, 'unique_pairs': 3802, 'min_sentence1_length': 12, 'average_sentence1_length': 51.92, 'max_sentence1_length': 296, 'unique_sentence1': 3802, 'min_sentence2_length': 11, 'average_sentence2_length': 62.44, 'max_sentence2_length': 345, 'unique_sentence2': 3802}, 'dan-isl': {'num_samples': 327, 'number_of_characters': 32853, 'unique_pairs': 327, 'min_sentence1_length': 12, 'average_sentence1_length': 51.35, 'max_sentence1_length': 198, 'unique_sentence1': 327, 'min_sentence2_length': 12, 'average_sentence2_length': 49.11, 'max_sentence2_length': 181, 'unique_sentence2': 327}, 'dan-ita': {'num_samples': 3818, 'number_of_characters': 421045, 'unique_pairs': 3818, 'min_sentence1_length': 12, 'average_sentence1_length': 53.2, 'max_sentence1_length': 296, 'unique_sentence1': 3818, 'min_sentence2_length': 11, 'average_sentence2_length': 57.08, 'max_sentence2_length': 271, 'unique_sentence2': 3818}, 'dan-nld': {'num_samples': 4099, 'number_of_characters': 428737, 'unique_pairs': 4099, 'min_sentence1_length': 11, 'average_sentence1_length': 51.53, 'max_sentence1_length': 225, 'unique_sentence1': 4099, 'min_sentence2_length': 10, 'average_sentence2_length': 53.06, 'max_sentence2_length': 254, 'unique_sentence2': 4099}, 'dan-nor': {'num_samples': 2603, 'number_of_characters': 278953, 'unique_pairs': 2603, 'min_sentence1_length': 13, 'average_sentence1_length': 54.74, 'max_sentence1_length': 296, 'unique_sentence1': 2603, 'min_sentence2_length': 11, 'average_sentence2_length': 52.42, 'max_sentence2_length': 291, 'unique_sentence2': 2603}, 'dan-por': {'num_samples': 3206, 'number_of_characters': 349267, 'unique_pairs': 3206, 'min_sentence1_length': 12, 'average_sentence1_length': 52.78, 'max_sentence1_length': 176, 'unique_sentence1': 3206, 'min_sentence2_length': 10, 'average_sentence2_length': 56.16, 'max_sentence2_length': 211, 'unique_sentence2': 3206}, 'dan-ron': {'num_samples': 2052, 'number_of_characters': 225425, 'unique_pairs': 2052, 'min_sentence1_length': 8, 'average_sentence1_length': 53.82, 'max_sentence1_length': 296, 'unique_sentence1': 2052, 'min_sentence2_length': 9, 'average_sentence2_length': 56.03, 'max_sentence2_length': 281, 'unique_sentence2': 2052}, 'dan-spa': {'num_samples': 3571, 'number_of_characters': 389048, 'unique_pairs': 3571, 'min_sentence1_length': 12, 'average_sentence1_length': 52.06, 'max_sentence1_length': 257, 'unique_sentence1': 3571, 'min_sentence2_length': 12, 'average_sentence2_length': 56.89, 'max_sentence2_length': 239, 'unique_sentence2': 3571}, 'dan-swe': {'num_samples': 4268, 'number_of_characters': 440347, 'unique_pairs': 4268, 'min_sentence1_length': 12, 'average_sentence1_length': 52.09, 'max_sentence1_length': 315, 'unique_sentence1': 4268, 'min_sentence2_length': 11, 'average_sentence2_length': 51.09, 'max_sentence2_length': 330, 'unique_sentence2': 4268}, 'deu-fra': {'num_samples': 27727, 'number_of_characters': 3222716, 'unique_pairs': 27727, 'min_sentence1_length': 6, 'average_sentence1_length': 55.56, 'max_sentence1_length': 337, 'unique_sentence1': 27727, 'min_sentence2_length': 6, 'average_sentence2_length': 60.67, 'max_sentence2_length': 330, 'unique_sentence2': 27727}, 'deu-isl': {'num_samples': 294, 'number_of_characters': 31097, 'unique_pairs': 294, 'min_sentence1_length': 14, 'average_sentence1_length': 56.09, 'max_sentence1_length': 201, 'unique_sentence1': 294, 'min_sentence2_length': 14, 'average_sentence2_length': 49.69, 'max_sentence2_length': 181, 'unique_sentence2': 294}, 'deu-ita': {'num_samples': 18787, 'number_of_characters': 2100285, 'unique_pairs': 18787, 'min_sentence1_length': 10, 'average_sentence1_length': 56.25, 'max_sentence1_length': 346, 'unique_sentence1': 18787, 'min_sentence2_length': 10, 'average_sentence2_length': 55.54, 'max_sentence2_length': 357, 'unique_sentence2': 18787}, 'deu-nld': {'num_samples': 14211, 'number_of_characters': 1508365, 'unique_pairs': 14211, 'min_sentence1_length': 10, 'average_sentence1_length': 54.51, 'max_sentence1_length': 299, 'unique_sentence1': 14211, 'min_sentence2_length': 10, 'average_sentence2_length': 51.63, 'max_sentence2_length': 276, 'unique_sentence2': 14211}, 'deu-nor': {'num_samples': 2783, 'number_of_characters': 295406, 'unique_pairs': 2783, 'min_sentence1_length': 10, 'average_sentence1_length': 56.26, 'max_sentence1_length': 224, 'unique_sentence1': 2783, 'min_sentence2_length': 11, 'average_sentence2_length': 49.89, 'max_sentence2_length': 163, 'unique_sentence2': 2783}, 'deu-por': {'num_samples': 11319, 'number_of_characters': 1213812, 'unique_pairs': 11319, 'min_sentence1_length': 9, 'average_sentence1_length': 54.7, 'max_sentence1_length': 374, 'unique_sentence1': 11319, 'min_sentence2_length': 10, 'average_sentence2_length': 52.54, 'max_sentence2_length': 337, 'unique_sentence2': 11319}, 'deu-ron': {'num_samples': 3598, 'number_of_characters': 401116, 'unique_pairs': 3598, 'min_sentence1_length': 8, 'average_sentence1_length': 57.01, 'max_sentence1_length': 716, 'unique_sentence1': 3598, 'min_sentence2_length': 9, 'average_sentence2_length': 54.48, 'max_sentence2_length': 699, 'unique_sentence2': 3598}, 'deu-spa': {'num_samples': 19739, 'number_of_characters': 2151487, 'unique_pairs': 19739, 'min_sentence1_length': 9, 'average_sentence1_length': 54.84, 'max_sentence1_length': 428, 'unique_sentence1': 19739, 'min_sentence2_length': 10, 'average_sentence2_length': 54.16, 'max_sentence2_length': 415, 'unique_sentence2': 19739}, 'deu-swe': {'num_samples': 5772, 'number_of_characters': 610949, 'unique_pairs': 5772, 'min_sentence1_length': 10, 'average_sentence1_length': 55.78, 'max_sentence1_length': 264, 'unique_sentence1': 5772, 'min_sentence2_length': 10, 'average_sentence2_length': 50.06, 'max_sentence2_length': 237, 'unique_sentence2': 5772}, 'fra-isl': {'num_samples': 347, 'number_of_characters': 39368, 'unique_pairs': 347, 'min_sentence1_length': 14, 'average_sentence1_length': 62.12, 'max_sentence1_length': 229, 'unique_sentence1': 347, 'min_sentence2_length': 16, 'average_sentence2_length': 51.34, 'max_sentence2_length': 194, 'unique_sentence2': 347}, 'fra-ita': {'num_samples': 20002, 'number_of_characters': 2326983, 'unique_pairs': 20002, 'min_sentence1_length': 10, 'average_sentence1_length': 61.12, 'max_sentence1_length': 743, 'unique_sentence1': 20002, 'min_sentence2_length': 8, 'average_sentence2_length': 55.22, 'max_sentence2_length': 653, 'unique_sentence2': 20002}, 'fra-nld': {'num_samples': 14684, 'number_of_characters': 1659621, 'unique_pairs': 14684, 'min_sentence1_length': 10, 'average_sentence1_length': 60.34, 'max_sentence1_length': 400, 'unique_sentence1': 14684, 'min_sentence2_length': 10, 'average_sentence2_length': 52.69, 'max_sentence2_length': 263, 'unique_sentence2': 14684}, 'fra-nor': {'num_samples': 2558, 'number_of_characters': 290540, 'unique_pairs': 2558, 'min_sentence1_length': 10, 'average_sentence1_length': 63.31, 'max_sentence1_length': 377, 'unique_sentence1': 2558, 'min_sentence2_length': 11, 'average_sentence2_length': 50.27, 'max_sentence2_length': 305, 'unique_sentence2': 2558}, 'fra-por': {'num_samples': 13265, 'number_of_characters': 1485292, 'unique_pairs': 13265, 'min_sentence1_length': 10, 'average_sentence1_length': 59.58, 'max_sentence1_length': 284, 'unique_sentence1': 13265, 'min_sentence2_length': 8, 'average_sentence2_length': 52.39, 'max_sentence2_length': 443, 'unique_sentence2': 13265}, 'fra-ron': {'num_samples': 3295, 'number_of_characters': 381273, 'unique_pairs': 3295, 'min_sentence1_length': 8, 'average_sentence1_length': 61.62, 'max_sentence1_length': 677, 'unique_sentence1': 3295, 'min_sentence2_length': 8, 'average_sentence2_length': 54.1, 'max_sentence2_length': 587, 'unique_sentence2': 3295}, 'fra-spa': {'num_samples': 23311, 'number_of_characters': 2650610, 'unique_pairs': 23311, 'min_sentence1_length': 10, 'average_sentence1_length': 59.63, 'max_sentence1_length': 677, 'unique_sentence1': 23311, 'min_sentence2_length': 8, 'average_sentence2_length': 54.08, 'max_sentence2_length': 576, 'unique_sentence2': 23311}, 'fra-swe': {'num_samples': 5006, 'number_of_characters': 565299, 'unique_pairs': 5006, 'min_sentence1_length': 10, 'average_sentence1_length': 62.23, 'max_sentence1_length': 345, 'unique_sentence1': 5006, 'min_sentence2_length': 11, 'average_sentence2_length': 50.7, 'max_sentence2_length': 289, 'unique_sentence2': 5006}, 'isl-ita': {'num_samples': 421, 'number_of_characters': 42563, 'unique_pairs': 421, 'min_sentence1_length': 14, 'average_sentence1_length': 48.12, 'max_sentence1_length': 145, 'unique_sentence1': 421, 'min_sentence2_length': 13, 'average_sentence2_length': 52.98, 'max_sentence2_length': 151, 'unique_sentence2': 421}, 'isl-nld': {'num_samples': 311, 'number_of_characters': 33292, 'unique_pairs': 311, 'min_sentence1_length': 13, 'average_sentence1_length': 51.5, 'max_sentence1_length': 160, 'unique_sentence1': 311, 'min_sentence2_length': 11, 'average_sentence2_length': 55.55, 'max_sentence2_length': 228, 'unique_sentence2': 311}, 'isl-por': {'num_samples': 341, 'number_of_characters': 34367, 'unique_pairs': 341, 'min_sentence1_length': 12, 'average_sentence1_length': 48.83, 'max_sentence1_length': 135, 'unique_sentence1': 341, 'min_sentence2_length': 12, 'average_sentence2_length': 51.96, 'max_sentence2_length': 156, 'unique_sentence2': 341}, 'isl-spa': {'num_samples': 366, 'number_of_characters': 38563, 'unique_pairs': 366, 'min_sentence1_length': 14, 'average_sentence1_length': 49.8, 'max_sentence1_length': 196, 'unique_sentence1': 366, 'min_sentence2_length': 17, 'average_sentence2_length': 55.56, 'max_sentence2_length': 206, 'unique_sentence2': 366}, 'isl-swe': {'num_samples': 312, 'number_of_characters': 30662, 'unique_pairs': 312, 'min_sentence1_length': 14, 'average_sentence1_length': 48.84, 'max_sentence1_length': 149, 'unique_sentence1': 312, 'min_sentence2_length': 15, 'average_sentence2_length': 49.44, 'max_sentence2_length': 141, 'unique_sentence2': 312}, 'ita-nld': {'num_samples': 9160, 'number_of_characters': 973718, 'unique_pairs': 9160, 'min_sentence1_length': 11, 'average_sentence1_length': 54.2, 'max_sentence1_length': 291, 'unique_sentence1': 9160, 'min_sentence2_length': 10, 'average_sentence2_length': 52.1, 'max_sentence2_length': 337, 'unique_sentence2': 9160}, 'ita-nor': {'num_samples': 2516, 'number_of_characters': 267838, 'unique_pairs': 2516, 'min_sentence1_length': 13, 'average_sentence1_length': 56.18, 'max_sentence1_length': 356, 'unique_sentence1': 2516, 'min_sentence2_length': 12, 'average_sentence2_length': 50.28, 'max_sentence2_length': 305, 'unique_sentence2': 2516}, 'ita-por': {'num_samples': 10924, 'number_of_characters': 1157982, 'unique_pairs': 10924, 'min_sentence1_length': 10, 'average_sentence1_length': 53.5, 'max_sentence1_length': 357, 'unique_sentence1': 10924, 'min_sentence2_length': 10, 'average_sentence2_length': 52.5, 'max_sentence2_length': 276, 'unique_sentence2': 10924}, 'ita-ron': {'num_samples': 3360, 'number_of_characters': 375697, 'unique_pairs': 3360, 'min_sentence1_length': 8, 'average_sentence1_length': 56.38, 'max_sentence1_length': 346, 'unique_sentence1': 3360, 'min_sentence2_length': 9, 'average_sentence2_length': 55.43, 'max_sentence2_length': 402, 'unique_sentence2': 3360}, 'ita-spa': {'num_samples': 16534, 'number_of_characters': 1778757, 'unique_pairs': 16534, 'min_sentence1_length': 10, 'average_sentence1_length': 53.56, 'max_sentence1_length': 362, 'unique_sentence1': 16534, 'min_sentence2_length': 11, 'average_sentence2_length': 54.02, 'max_sentence2_length': 363, 'unique_sentence2': 16534}, 'ita-swe': {'num_samples': 4741, 'number_of_characters': 508653, 'unique_pairs': 4741, 'min_sentence1_length': 10, 'average_sentence1_length': 55.87, 'max_sentence1_length': 271, 'unique_sentence1': 4741, 'min_sentence2_length': 10, 'average_sentence2_length': 51.42, 'max_sentence2_length': 289, 'unique_sentence2': 4741}, 'nld-nor': {'num_samples': 2664, 'number_of_characters': 281584, 'unique_pairs': 2664, 'min_sentence1_length': 10, 'average_sentence1_length': 54.62, 'max_sentence1_length': 211, 'unique_sentence1': 2664, 'min_sentence2_length': 11, 'average_sentence2_length': 51.08, 'max_sentence2_length': 162, 'unique_sentence2': 2664}, 'nld-por': {'num_samples': 7021, 'number_of_characters': 738280, 'unique_pairs': 7021, 'min_sentence1_length': 10, 'average_sentence1_length': 51.91, 'max_sentence1_length': 243, 'unique_sentence1': 7021, 'min_sentence2_length': 11, 'average_sentence2_length': 53.24, 'max_sentence2_length': 443, 'unique_sentence2': 7021}, 'nld-ron': {'num_samples': 2888, 'number_of_characters': 311689, 'unique_pairs': 2888, 'min_sentence1_length': 10, 'average_sentence1_length': 53.16, 'max_sentence1_length': 228, 'unique_sentence1': 2888, 'min_sentence2_length': 11, 'average_sentence2_length': 54.77, 'max_sentence2_length': 252, 'unique_sentence2': 2888}, 'nld-spa': {'num_samples': 9555, 'number_of_characters': 1015756, 'unique_pairs': 9555, 'min_sentence1_length': 10, 'average_sentence1_length': 51.91, 'max_sentence1_length': 229, 'unique_sentence1': 9555, 'min_sentence2_length': 10, 'average_sentence2_length': 54.39, 'max_sentence2_length': 235, 'unique_sentence2': 9555}, 'nld-swe': {'num_samples': 5072, 'number_of_characters': 529162, 'unique_pairs': 5072, 'min_sentence1_length': 10, 'average_sentence1_length': 53.53, 'max_sentence1_length': 333, 'unique_sentence1': 5072, 'min_sentence2_length': 10, 'average_sentence2_length': 50.8, 'max_sentence2_length': 311, 'unique_sentence2': 5072}, 'nor-por': {'num_samples': 2096, 'number_of_characters': 227941, 'unique_pairs': 2096, 'min_sentence1_length': 11, 'average_sentence1_length': 51.43, 'max_sentence1_length': 200, 'unique_sentence1': 2096, 'min_sentence2_length': 12, 'average_sentence2_length': 57.32, 'max_sentence2_length': 180, 'unique_sentence2': 2096}, 'nor-ron': {'num_samples': 1412, 'number_of_characters': 156087, 'unique_pairs': 1412, 'min_sentence1_length': 13, 'average_sentence1_length': 52.16, 'max_sentence1_length': 291, 'unique_sentence1': 1412, 'min_sentence2_length': 14, 'average_sentence2_length': 58.39, 'max_sentence2_length': 281, 'unique_sentence2': 1412}, 'nor-spa': {'num_samples': 2603, 'number_of_characters': 283540, 'unique_pairs': 2603, 'min_sentence1_length': 12, 'average_sentence1_length': 51.0, 'max_sentence1_length': 283, 'unique_sentence1': 2603, 'min_sentence2_length': 13, 'average_sentence2_length': 57.93, 'max_sentence2_length': 274, 'unique_sentence2': 2603}, 'nor-swe': {'num_samples': 3165, 'number_of_characters': 325198, 'unique_pairs': 3165, 'min_sentence1_length': 11, 'average_sentence1_length': 50.94, 'max_sentence1_length': 291, 'unique_sentence1': 3165, 'min_sentence2_length': 10, 'average_sentence2_length': 51.81, 'max_sentence2_length': 289, 'unique_sentence2': 3165}, 'por-ron': {'num_samples': 3026, 'number_of_characters': 341012, 'unique_pairs': 3026, 'min_sentence1_length': 12, 'average_sentence1_length': 56.57, 'max_sentence1_length': 334, 'unique_sentence1': 3026, 'min_sentence2_length': 12, 'average_sentence2_length': 56.13, 'max_sentence2_length': 402, 'unique_sentence2': 3026}, 'por-spa': {'num_samples': 16084, 'number_of_characters': 1698580, 'unique_pairs': 16084, 'min_sentence1_length': 10, 'average_sentence1_length': 51.93, 'max_sentence1_length': 350, 'unique_sentence1': 16084, 'min_sentence2_length': 10, 'average_sentence2_length': 53.68, 'max_sentence2_length': 364, 'unique_sentence2': 16084}, 'por-swe': {'num_samples': 4235, 'number_of_characters': 444824, 'unique_pairs': 4235, 'min_sentence1_length': 11, 'average_sentence1_length': 54.48, 'max_sentence1_length': 282, 'unique_sentence1': 4235, 'min_sentence2_length': 11, 'average_sentence2_length': 50.55, 'max_sentence2_length': 255, 'unique_sentence2': 4235}, 'ron-spa': {'num_samples': 3375, 'number_of_characters': 376497, 'unique_pairs': 3375, 'min_sentence1_length': 12, 'average_sentence1_length': 54.71, 'max_sentence1_length': 587, 'unique_sentence1': 3375, 'min_sentence2_length': 11, 'average_sentence2_length': 56.85, 'max_sentence2_length': 576, 'unique_sentence2': 3375}, 'ron-swe': {'num_samples': 2154, 'number_of_characters': 238390, 'unique_pairs': 2154, 'min_sentence1_length': 9, 'average_sentence1_length': 57.04, 'max_sentence1_length': 281, 'unique_sentence1': 2154, 'min_sentence2_length': 8, 'average_sentence2_length': 53.64, 'max_sentence2_length': 289, 'unique_sentence2': 2154}, 'spa-swe': {'num_samples': 4884, 'number_of_characters': 525651, 'unique_pairs': 4884, 'min_sentence1_length': 12, 'average_sentence1_length': 56.58, 'max_sentence1_length': 244, 'unique_sentence1': 4884, 'min_sentence2_length': 10, 'average_sentence2_length': 51.04, 'max_sentence2_length': 280, 'unique_sentence2': 4884}, 'ben-hin': {'num_samples': 1174, 'number_of_characters': 115288, 'unique_pairs': 1174, 'min_sentence1_length': 11, 'average_sentence1_length': 47.5, 'max_sentence1_length': 145, 'unique_sentence1': 1174, 'min_sentence2_length': 13, 'average_sentence2_length': 50.71, 'max_sentence2_length': 149, 'unique_sentence2': 1174}, 'ben-mar': {'num_samples': 566, 'number_of_characters': 54106, 'unique_pairs': 566, 'min_sentence1_length': 12, 'average_sentence1_length': 47.8, 'max_sentence1_length': 145, 'unique_sentence1': 566, 'min_sentence2_length': 14, 'average_sentence2_length': 47.8, 'max_sentence2_length': 130, 'unique_sentence2': 566}, 'ben-urd': {'num_samples': 488, 'number_of_characters': 44387, 'unique_pairs': 488, 'min_sentence1_length': 11, 'average_sentence1_length': 44.0, 'max_sentence1_length': 110, 'unique_sentence1': 488, 'min_sentence2_length': 12, 'average_sentence2_length': 46.96, 'max_sentence2_length': 101, 'unique_sentence2': 488}, 'hin-mar': {'num_samples': 615, 'number_of_characters': 59136, 'unique_pairs': 615, 'min_sentence1_length': 11, 'average_sentence1_length': 49.57, 'max_sentence1_length': 149, 'unique_sentence1': 615, 'min_sentence2_length': 11, 'average_sentence2_length': 46.59, 'max_sentence2_length': 143, 'unique_sentence2': 615}, 'hin-urd': {'num_samples': 545, 'number_of_characters': 52165, 'unique_pairs': 545, 'min_sentence1_length': 14, 'average_sentence1_length': 48.38, 'max_sentence1_length': 111, 'unique_sentence1': 545, 'min_sentence2_length': 12, 'average_sentence2_length': 47.34, 'max_sentence2_length': 125, 'unique_sentence2': 545}, 'mar-urd': {'num_samples': 270, 'number_of_characters': 23951, 'unique_pairs': 270, 'min_sentence1_length': 14, 'average_sentence1_length': 43.16, 'max_sentence1_length': 108, 'unique_sentence1': 270, 'min_sentence2_length': 13, 'average_sentence2_length': 45.54, 'max_sentence2_length': 107, 'unique_sentence2': 270}, 'aze-kaz': {'num_samples': 412, 'number_of_characters': 38912, 'unique_pairs': 412, 'min_sentence1_length': 12, 'average_sentence1_length': 46.7, 'max_sentence1_length': 121, 'unique_sentence1': 412, 'min_sentence2_length': 14, 'average_sentence2_length': 47.75, 'max_sentence2_length': 108, 'unique_sentence2': 412}, 'aze-tur': {'num_samples': 388, 'number_of_characters': 36138, 'unique_pairs': 388, 'min_sentence1_length': 12, 'average_sentence1_length': 46.69, 'max_sentence1_length': 124, 'unique_sentence1': 388, 'min_sentence2_length': 12, 'average_sentence2_length': 46.45, 'max_sentence2_length': 123, 'unique_sentence2': 388}, 'kaz-tur': {'num_samples': 340, 'number_of_characters': 31637, 'unique_pairs': 340, 'min_sentence1_length': 17, 'average_sentence1_length': 47.27, 'max_sentence1_length': 122, 'unique_sentence1': 340, 'min_sentence2_length': 12, 'average_sentence2_length': 45.78, 'max_sentence2_length': 114, 'unique_sentence2': 340}, 'est-fin': {'num_samples': 790, 'number_of_characters': 80226, 'unique_pairs': 790, 'min_sentence1_length': 13, 'average_sentence1_length': 50.35, 'max_sentence1_length': 158, 'unique_sentence1': 790, 'min_sentence2_length': 14, 'average_sentence2_length': 51.21, 'max_sentence2_length': 152, 'unique_sentence2': 790}, 'est-hun': {'num_samples': 674, 'number_of_characters': 69641, 'unique_pairs': 674, 'min_sentence1_length': 8, 'average_sentence1_length': 50.49, 'max_sentence1_length': 157, 'unique_sentence1': 674, 'min_sentence2_length': 9, 'average_sentence2_length': 52.83, 'max_sentence2_length': 180, 'unique_sentence2': 674}, 'fin-hun': {'num_samples': 1542, 'number_of_characters': 167588, 'unique_pairs': 1542, 'min_sentence1_length': 8, 'average_sentence1_length': 53.55, 'max_sentence1_length': 243, 'unique_sentence1': 1542, 'min_sentence2_length': 9, 'average_sentence2_length': 55.13, 'max_sentence2_length': 228, 'unique_sentence2': 1542}, 'ara-eng': {'num_samples': 5698, 'number_of_characters': 544132, 'unique_pairs': 5698, 'min_sentence1_length': 10, 'average_sentence1_length': 45.67, 'max_sentence1_length': 280, 'unique_sentence1': 5698, 'min_sentence2_length': 11, 'average_sentence2_length': 49.82, 'max_sentence2_length': 287, 'unique_sentence2': 5698}, 'aze-eng': {'num_samples': 603, 'number_of_characters': 58907, 'unique_pairs': 603, 'min_sentence1_length': 12, 'average_sentence1_length': 49.94, 'max_sentence1_length': 121, 'unique_sentence1': 603, 'min_sentence2_length': 12, 'average_sentence2_length': 47.75, 'max_sentence2_length': 129, 'unique_sentence2': 603}, 'ben-eng': {'num_samples': 1367, 'number_of_characters': 126399, 'unique_pairs': 1367, 'min_sentence1_length': 10, 'average_sentence1_length': 46.61, 'max_sentence1_length': 147, 'unique_sentence1': 1367, 'min_sentence2_length': 14, 'average_sentence2_length': 45.85, 'max_sentence2_length': 148, 'unique_sentence2': 1367}, 'bul-eng': {'num_samples': 2133, 'number_of_characters': 219893, 'unique_pairs': 2133, 'min_sentence1_length': 12, 'average_sentence1_length': 53.71, 'max_sentence1_length': 273, 'unique_sentence1': 2133, 'min_sentence2_length': 11, 'average_sentence2_length': 49.38, 'max_sentence2_length': 287, 'unique_sentence2': 2133}, 'cat-eng': {'num_samples': 1152, 'number_of_characters': 118852, 'unique_pairs': 1152, 'min_sentence1_length': 10, 'average_sentence1_length': 52.34, 'max_sentence1_length': 202, 'unique_sentence1': 1152, 'min_sentence2_length': 11, 'average_sentence2_length': 50.83, 'max_sentence2_length': 195, 'unique_sentence2': 1152}, 'ces-eng': {'num_samples': 3775, 'number_of_characters': 364386, 'unique_pairs': 3775, 'min_sentence1_length': 10, 'average_sentence1_length': 47.79, 'max_sentence1_length': 265, 'unique_sentence1': 3775, 'min_sentence2_length': 10, 'average_sentence2_length': 48.74, 'max_sentence2_length': 287, 'unique_sentence2': 3775}, 'dan-eng': {'num_samples': 4512, 'number_of_characters': 451232, 'unique_pairs': 4512, 'min_sentence1_length': 10, 'average_sentence1_length': 51.15, 'max_sentence1_length': 296, 'unique_sentence1': 4512, 'min_sentence2_length': 10, 'average_sentence2_length': 48.86, 'max_sentence2_length': 287, 'unique_sentence2': 4512}, 'deu-eng': {'num_samples': 37348, 'number_of_characters': 3890899, 'unique_pairs': 37348, 'min_sentence1_length': 8, 'average_sentence1_length': 55.15, 'max_sentence1_length': 716, 'unique_sentence1': 37348, 'min_sentence2_length': 9, 'average_sentence2_length': 49.03, 'max_sentence2_length': 586, 'unique_sentence2': 37348}, 'ell-eng': {'num_samples': 2790, 'number_of_characters': 302459, 'unique_pairs': 2790, 'min_sentence1_length': 14, 'average_sentence1_length': 58.78, 'max_sentence1_length': 286, 'unique_sentence1': 2790, 'min_sentence2_length': 11, 'average_sentence2_length': 49.63, 'max_sentence2_length': 243, 'unique_sentence2': 2790}, 'eng-est': {'num_samples': 755, 'number_of_characters': 75289, 'unique_pairs': 755, 'min_sentence1_length': 12, 'average_sentence1_length': 49.1, 'max_sentence1_length': 152, 'unique_sentence1': 755, 'min_sentence2_length': 15, 'average_sentence2_length': 50.62, 'max_sentence2_length': 160, 'unique_sentence2': 755}, 'eng-fas': {'num_samples': 556, 'number_of_characters': 52628, 'unique_pairs': 556, 'min_sentence1_length': 12, 'average_sentence1_length': 48.16, 'max_sentence1_length': 184, 'unique_sentence1': 556, 'min_sentence2_length': 10, 'average_sentence2_length': 46.49, 'max_sentence2_length': 161, 'unique_sentence2': 556}, 'eng-fin': {'num_samples': 3443, 'number_of_characters': 348436, 'unique_pairs': 3443, 'min_sentence1_length': 11, 'average_sentence1_length': 49.68, 'max_sentence1_length': 410, 'unique_sentence1': 3443, 'min_sentence2_length': 11, 'average_sentence2_length': 51.52, 'max_sentence2_length': 387, 'unique_sentence2': 3443}, 'eng-fra': {'num_samples': 37208, 'number_of_characters': 4091513, 'unique_pairs': 37208, 'min_sentence1_length': 8, 'average_sentence1_length': 49.11, 'max_sentence1_length': 414, 'unique_sentence1': 37208, 'min_sentence2_length': 10, 'average_sentence2_length': 60.86, 'max_sentence2_length': 428, 'unique_sentence2': 37208}, 'eng-heb': {'num_samples': 882, 'number_of_characters': 88397, 'unique_pairs': 882, 'min_sentence1_length': 13, 'average_sentence1_length': 54.84, 'max_sentence1_length': 193, 'unique_sentence1': 882, 'min_sentence2_length': 10, 'average_sentence2_length': 45.38, 'max_sentence2_length': 162, 'unique_sentence2': 882}, 'eng-hin': {'num_samples': 2219, 'number_of_characters': 227451, 'unique_pairs': 2219, 'min_sentence1_length': 12, 'average_sentence1_length': 49.35, 'max_sentence1_length': 222, 'unique_sentence1': 2219, 'min_sentence2_length': 12, 'average_sentence2_length': 53.15, 'max_sentence2_length': 281, 'unique_sentence2': 2219}, 'eng-hrv': {'num_samples': 336, 'number_of_characters': 35675, 'unique_pairs': 336, 'min_sentence1_length': 11, 'average_sentence1_length': 52.62, 'max_sentence1_length': 264, 'unique_sentence1': 336, 'min_sentence2_length': 16, 'average_sentence2_length': 53.56, 'max_sentence2_length': 258, 'unique_sentence2': 336}, 'eng-hun': {'num_samples': 2185, 'number_of_characters': 225323, 'unique_pairs': 2185, 'min_sentence1_length': 8, 'average_sentence1_length': 50.01, 'max_sentence1_length': 238, 'unique_sentence1': 2185, 'min_sentence2_length': 9, 'average_sentence2_length': 53.11, 'max_sentence2_length': 233, 'unique_sentence2': 2185}, 'eng-ind': {'num_samples': 3454, 'number_of_characters': 364799, 'unique_pairs': 3454, 'min_sentence1_length': 11, 'average_sentence1_length': 49.18, 'max_sentence1_length': 222, 'unique_sentence1': 3454, 'min_sentence2_length': 11, 'average_sentence2_length': 56.43, 'max_sentence2_length': 245, 'unique_sentence2': 3454}, 'eng-isl': {'num_samples': 358, 'number_of_characters': 33431, 'unique_pairs': 358, 'min_sentence1_length': 13, 'average_sentence1_length': 46.72, 'max_sentence1_length': 136, 'unique_sentence1': 358, 'min_sentence2_length': 13, 'average_sentence2_length': 46.66, 'max_sentence2_length': 122, 'unique_sentence2': 358}, 'eng-ita': {'num_samples': 19661, 'number_of_characters': 2063797, 'unique_pairs': 19661, 'min_sentence1_length': 10, 'average_sentence1_length': 49.52, 'max_sentence1_length': 365, 'unique_sentence1': 19661, 'min_sentence2_length': 10, 'average_sentence2_length': 55.45, 'max_sentence2_length': 362, 'unique_sentence2': 19661}, 'eng-jpn': {'num_samples': 3807, 'number_of_characters': 318641, 'unique_pairs': 3807, 'min_sentence1_length': 12, 'average_sentence1_length': 51.45, 'max_sentence1_length': 743, 'unique_sentence1': 3807, 'min_sentence2_length': 10, 'average_sentence2_length': 32.25, 'max_sentence2_length': 414, 'unique_sentence2': 3807}, 'eng-kaz': {'num_samples': 346, 'number_of_characters': 32798, 'unique_pairs': 346, 'min_sentence1_length': 15, 'average_sentence1_length': 45.54, 'max_sentence1_length': 110, 'unique_sentence1': 346, 'min_sentence2_length': 18, 'average_sentence2_length': 49.25, 'max_sentence2_length': 122, 'unique_sentence2': 346}, 'eng-kor': {'num_samples': 2558, 'number_of_characters': 217654, 'unique_pairs': 2558, 'min_sentence1_length': 12, 'average_sentence1_length': 51.78, 'max_sentence1_length': 252, 'unique_sentence1': 2558, 'min_sentence2_length': 10, 'average_sentence2_length': 33.31, 'max_sentence2_length': 225, 'unique_sentence2': 2558}, 'eng-lav': {'num_samples': 1079, 'number_of_characters': 103672, 'unique_pairs': 1079, 'min_sentence1_length': 12, 'average_sentence1_length': 47.47, 'max_sentence1_length': 165, 'unique_sentence1': 1079, 'min_sentence2_length': 11, 'average_sentence2_length': 48.61, 'max_sentence2_length': 151, 'unique_sentence2': 1079}, 'eng-lit': {'num_samples': 1185, 'number_of_characters': 113428, 'unique_pairs': 1185, 'min_sentence1_length': 12, 'average_sentence1_length': 47.12, 'max_sentence1_length': 175, 'unique_sentence1': 1185, 'min_sentence2_length': 12, 'average_sentence2_length': 48.6, 'max_sentence2_length': 167, 'unique_sentence2': 1185}, 'eng-mar': {'num_samples': 280, 'number_of_characters': 26641, 'unique_pairs': 280, 'min_sentence1_length': 14, 'average_sentence1_length': 47.66, 'max_sentence1_length': 148, 'unique_sentence1': 280, 'min_sentence2_length': 14, 'average_sentence2_length': 47.49, 'max_sentence2_length': 143, 'unique_sentence2': 280}, 'eng-msa': {'num_samples': 469, 'number_of_characters': 59862, 'unique_pairs': 469, 'min_sentence1_length': 15, 'average_sentence1_length': 61.27, 'max_sentence1_length': 163, 'unique_sentence1': 469, 'min_sentence2_length': 14, 'average_sentence2_length': 66.37, 'max_sentence2_length': 170, 'unique_sentence2': 469}, 'eng-nld': {'num_samples': 15613, 'number_of_characters': 1555910, 'unique_pairs': 15613, 'min_sentence1_length': 10, 'average_sentence1_length': 47.86, 'max_sentence1_length': 312, 'unique_sentence1': 15613, 'min_sentence2_length': 10, 'average_sentence2_length': 51.79, 'max_sentence2_length': 337, 'unique_sentence2': 15613}, 'eng-nor': {'num_samples': 2666, 'number_of_characters': 263032, 'unique_pairs': 2666, 'min_sentence1_length': 11, 'average_sentence1_length': 49.04, 'max_sentence1_length': 287, 'unique_sentence1': 2666, 'min_sentence2_length': 11, 'average_sentence2_length': 49.63, 'max_sentence2_length': 291, 'unique_sentence2': 2666}, 'eng-pol': {'num_samples': 6868, 'number_of_characters': 682796, 'unique_pairs': 6868, 'min_sentence1_length': 8, 'average_sentence1_length': 47.91, 'max_sentence1_length': 313, 'unique_sentence1': 6868, 'min_sentence2_length': 9, 'average_sentence2_length': 51.51, 'max_sentence2_length': 325, 'unique_sentence2': 6868}, 'eng-por': {'num_samples': 12406, 'number_of_characters': 1237980, 'unique_pairs': 12406, 'min_sentence1_length': 10, 'average_sentence1_length': 47.42, 'max_sentence1_length': 743, 'unique_sentence1': 12406, 'min_sentence2_length': 10, 'average_sentence2_length': 52.37, 'max_sentence2_length': 761, 'unique_sentence2': 12406}, 'eng-ron': {'num_samples': 3039, 'number_of_characters': 322632, 'unique_pairs': 3039, 'min_sentence1_length': 8, 'average_sentence1_length': 50.61, 'max_sentence1_length': 586, 'unique_sentence1': 3039, 'min_sentence2_length': 9, 'average_sentence2_length': 55.55, 'max_sentence2_length': 699, 'unique_sentence2': 3039}, 'eng-rus': {'num_samples': 9360, 'number_of_characters': 983153, 'unique_pairs': 9360, 'min_sentence1_length': 10, 'average_sentence1_length': 51.39, 'max_sentence1_length': 607, 'unique_sentence1': 9360, 'min_sentence2_length': 10, 'average_sentence2_length': 53.65, 'max_sentence2_length': 773, 'unique_sentence2': 9360}, 'eng-slk': {'num_samples': 1823, 'number_of_characters': 178649, 'unique_pairs': 1823, 'min_sentence1_length': 12, 'average_sentence1_length': 49.06, 'max_sentence1_length': 637, 'unique_sentence1': 1823, 'min_sentence2_length': 10, 'average_sentence2_length': 48.94, 'max_sentence2_length': 597, 'unique_sentence2': 1823}, 'eng-slv': {'num_samples': 1450, 'number_of_characters': 151964, 'unique_pairs': 1450, 'min_sentence1_length': 12, 'average_sentence1_length': 51.28, 'max_sentence1_length': 287, 'unique_sentence1': 1450, 'min_sentence2_length': 11, 'average_sentence2_length': 53.52, 'max_sentence2_length': 252, 'unique_sentence2': 1450}, 'eng-spa': {'num_samples': 35446, 'number_of_characters': 3679410, 'unique_pairs': 35446, 'min_sentence1_length': 10, 'average_sentence1_length': 48.67, 'max_sentence1_length': 525, 'unique_sentence1': 35446, 'min_sentence2_length': 10, 'average_sentence2_length': 55.13, 'max_sentence2_length': 607, 'unique_sentence2': 35446}, 'eng-srp': {'num_samples': 303, 'number_of_characters': 29733, 'unique_pairs': 303, 'min_sentence1_length': 15, 'average_sentence1_length': 48.15, 'max_sentence1_length': 123, 'unique_sentence1': 303, 'min_sentence2_length': 10, 'average_sentence2_length': 49.98, 'max_sentence2_length': 128, 'unique_sentence2': 303}, 'eng-swe': {'num_samples': 6005, 'number_of_characters': 597122, 'unique_pairs': 6005, 'min_sentence1_length': 10, 'average_sentence1_length': 49.21, 'max_sentence1_length': 287, 'unique_sentence1': 6005, 'min_sentence2_length': 10, 'average_sentence2_length': 50.22, 'max_sentence2_length': 289, 'unique_sentence2': 6005}, 'eng-tgl': {'num_samples': 551, 'number_of_characters': 56336, 'unique_pairs': 551, 'min_sentence1_length': 14, 'average_sentence1_length': 45.61, 'max_sentence1_length': 165, 'unique_sentence1': 551, 'min_sentence2_length': 13, 'average_sentence2_length': 56.64, 'max_sentence2_length': 198, 'unique_sentence2': 551}, 'eng-tha': {'num_samples': 814, 'number_of_characters': 79610, 'unique_pairs': 814, 'min_sentence1_length': 11, 'average_sentence1_length': 50.45, 'max_sentence1_length': 544, 'unique_sentence1': 814, 'min_sentence2_length': 11, 'average_sentence2_length': 47.35, 'max_sentence2_length': 511, 'unique_sentence2': 814}, 'eng-tur': {'num_samples': 4606, 'number_of_characters': 446492, 'unique_pairs': 4606, 'min_sentence1_length': 10, 'average_sentence1_length': 47.39, 'max_sentence1_length': 287, 'unique_sentence1': 4606, 'min_sentence2_length': 10, 'average_sentence2_length': 49.54, 'max_sentence2_length': 314, 'unique_sentence2': 4606}, 'eng-ukr': {'num_samples': 3778, 'number_of_characters': 388398, 'unique_pairs': 3778, 'min_sentence1_length': 11, 'average_sentence1_length': 51.14, 'max_sentence1_length': 284, 'unique_sentence1': 3778, 'min_sentence2_length': 10, 'average_sentence2_length': 51.66, 'max_sentence2_length': 255, 'unique_sentence2': 3778}, 'eng-urd': {'num_samples': 268, 'number_of_characters': 25182, 'unique_pairs': 268, 'min_sentence1_length': 14, 'average_sentence1_length': 46.1, 'max_sentence1_length': 99, 'unique_sentence1': 268, 'min_sentence2_length': 15, 'average_sentence2_length': 47.86, 'max_sentence2_length': 107, 'unique_sentence2': 268}, 'eng-vie': {'num_samples': 1264, 'number_of_characters': 123022, 'unique_pairs': 1264, 'min_sentence1_length': 12, 'average_sentence1_length': 45.65, 'max_sentence1_length': 241, 'unique_sentence1': 1264, 'min_sentence2_length': 10, 'average_sentence2_length': 51.68, 'max_sentence2_length': 262, 'unique_sentence2': 1264}, 'eng-zho': {'num_samples': 4959, 'number_of_characters': 347349, 'unique_pairs': 4959, 'min_sentence1_length': 11, 'average_sentence1_length': 50.01, 'max_sentence1_length': 847, 'unique_sentence1': 4959, 'min_sentence2_length': 8, 'average_sentence2_length': 20.04, 'max_sentence2_length': 620, 'unique_sentence2': 4959}}}}
WebFAQRetrieval (Michael Dinzinger, 2025) ['ara', 'aze', 'ben', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'est', 'fas', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun', 'ind', 'isl', 'ita', 'jpn', 'kat', 'kaz', 'kor', 'lav', 'lit', 'mar', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tgl', 'tha', 'tur', 'ukr', 'urd', 'uzb', 'vie', 'zho'] Retrieval s2p [Web, Written] {'test': 11434472} {'test': {'number_of_characters': 35239389, 'num_samples': 11434472, 'num_queries': 271168, 'num_documents': 11163304, 'min_document_length': 7, 'average_document_length': 1.16, 'max_document_length': 4317, 'unique_documents': 11163304, 'min_query_length': 2, 'average_query_length': 82.33, 'max_query_length': 2, 'unique_queries': 271168, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 271168, 'hf_subset_descriptive_stats': {'ara': {'number_of_characters': 746242, 'num_samples': 152664, 'num_queries': 10000, 'num_documents': 142664, 'min_document_length': 10, 'average_document_length': 3.23, 'max_document_length': 1503, 'unique_documents': 142664, 'min_query_length': 2, 'average_query_length': 28.53, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'aze': {'number_of_characters': 33357, 'num_samples': 5356, 'num_queries': 487, 'num_documents': 4869, 'min_document_length': 10, 'average_document_length': 4.85, 'max_document_length': 149, 'unique_documents': 4869, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 487, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 487}, 'ben': {'number_of_characters': 92226, 'num_samples': 15747, 'num_queries': 1432, 'num_documents': 14315, 'min_document_length': 10, 'average_document_length': 4.44, 'max_document_length': 322, 'unique_documents': 14315, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 1432, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1432}, 'bul': {'number_of_characters': 248572, 'num_samples': 38208, 'num_queries': 3474, 'num_documents': 34734, 'min_document_length': 10, 'average_document_length': 5.16, 'max_document_length': 396, 'unique_documents': 34734, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 3474, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3474}, 'cat': {'number_of_characters': 93488, 'num_samples': 13964, 'num_queries': 1270, 'num_documents': 12694, 'min_document_length': 10, 'average_document_length': 5.36, 'max_document_length': 184, 'unique_documents': 12694, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 1270, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1270}, 'ces': {'number_of_characters': 481288, 'num_samples': 79539, 'num_queries': 7231, 'num_documents': 72308, 'min_document_length': 9, 'average_document_length': 4.66, 'max_document_length': 994, 'unique_documents': 72308, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 7231, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 7231}, 'dan': {'number_of_characters': 760053, 'num_samples': 147686, 'num_queries': 10000, 'num_documents': 137686, 'min_document_length': 10, 'average_document_length': 3.52, 'max_document_length': 1420, 'unique_documents': 137686, 'min_query_length': 2, 'average_query_length': 27.54, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'deu': {'number_of_characters': 2318297, 'num_samples': 901201, 'num_queries': 10000, 'num_documents': 891201, 'min_document_length': 7, 'average_document_length': 0.6, 'max_document_length': 1191, 'unique_documents': 891201, 'min_query_length': 2, 'average_query_length': 178.24, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'ell': {'number_of_characters': 295282, 'num_samples': 42368, 'num_queries': 3852, 'num_documents': 38516, 'min_document_length': 12, 'average_document_length': 5.67, 'max_document_length': 229, 'unique_documents': 38516, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 3852, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3852}, 'eng': {'number_of_characters': 11040979, 'num_samples': 5288725, 'num_queries': 10000, 'num_documents': 5278725, 'min_document_length': 9, 'average_document_length': 0.09, 'max_document_length': 1177, 'unique_documents': 5278725, 'min_query_length': 2, 'average_query_length': 1055.74, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'est': {'number_of_characters': 90781, 'num_samples': 14181, 'num_queries': 1290, 'num_documents': 12891, 'min_document_length': 10, 'average_document_length': 5.04, 'max_document_length': 177, 'unique_documents': 12891, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 1290, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1290}, 'fas': {'number_of_characters': 914501, 'num_samples': 236940, 'num_queries': 10000, 'num_documents': 226940, 'min_document_length': 10, 'average_document_length': 2.03, 'max_document_length': 3115, 'unique_documents': 226940, 'min_query_length': 2, 'average_query_length': 45.39, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'fin': {'number_of_characters': 500237, 'num_samples': 80902, 'num_queries': 7355, 'num_documents': 73547, 'min_document_length': 9, 'average_document_length': 4.8, 'max_document_length': 294, 'unique_documents': 73547, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 7355, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 7355}, 'fra': {'number_of_characters': 1727184, 'num_samples': 579505, 'num_queries': 10000, 'num_documents': 569505, 'min_document_length': 8, 'average_document_length': 1.03, 'max_document_length': 489, 'unique_documents': 569505, 'min_query_length': 2, 'average_query_length': 113.9, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'heb': {'number_of_characters': 232589, 'num_samples': 42847, 'num_queries': 3896, 'num_documents': 38951, 'min_document_length': 10, 'average_document_length': 3.97, 'max_document_length': 278, 'unique_documents': 38951, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 3896, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3896}, 'hin': {'number_of_characters': 685269, 'num_samples': 110031, 'num_queries': 10000, 'num_documents': 100031, 'min_document_length': 10, 'average_document_length': 4.85, 'max_document_length': 879, 'unique_documents': 100031, 'min_query_length': 2, 'average_query_length': 20.01, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'hrv': {'number_of_characters': 38324, 'num_samples': 6100, 'num_queries': 555, 'num_documents': 5545, 'min_document_length': 11, 'average_document_length': 4.91, 'max_document_length': 132, 'unique_documents': 5545, 'min_query_length': 2, 'average_query_length': 19.98, 'max_query_length': 2, 'unique_queries': 555, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 555}, 'hun': {'number_of_characters': 319732, 'num_samples': 49824, 'num_queries': 4530, 'num_documents': 45294, 'min_document_length': 10, 'average_document_length': 5.06, 'max_document_length': 575, 'unique_documents': 45294, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 4530, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 4530}, 'ind': {'number_of_characters': 768010, 'num_samples': 121315, 'num_queries': 10000, 'num_documents': 111315, 'min_document_length': 8, 'average_document_length': 4.9, 'max_document_length': 850, 'unique_documents': 111315, 'min_query_length': 2, 'average_query_length': 22.26, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'isl': {'number_of_characters': 37490, 'num_samples': 5256, 'num_queries': 478, 'num_documents': 4778, 'min_document_length': 12, 'average_document_length': 5.85, 'max_document_length': 149, 'unique_documents': 4778, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 478, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 478}, 'ita': {'number_of_characters': 1094898, 'num_samples': 267803, 'num_queries': 10000, 'num_documents': 257803, 'min_document_length': 10, 'average_document_length': 2.25, 'max_document_length': 905, 'unique_documents': 257803, 'min_query_length': 2, 'average_query_length': 51.56, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'jpn': {'number_of_characters': 882722, 'num_samples': 319157, 'num_queries': 10000, 'num_documents': 309157, 'min_document_length': 10, 'average_document_length': 0.86, 'max_document_length': 645, 'unique_documents': 309157, 'min_query_length': 2, 'average_query_length': 61.83, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'kat': {'number_of_characters': 16624, 'num_samples': 2646, 'num_queries': 241, 'num_documents': 2405, 'min_document_length': 13, 'average_document_length': 4.91, 'max_document_length': 125, 'unique_documents': 2405, 'min_query_length': 2, 'average_query_length': 19.96, 'max_query_length': 2, 'unique_queries': 241, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 241}, 'kaz': {'number_of_characters': 21230, 'num_samples': 3295, 'num_queries': 300, 'num_documents': 2995, 'min_document_length': 16, 'average_document_length': 5.09, 'max_document_length': 260, 'unique_documents': 2995, 'min_query_length': 2, 'average_query_length': 19.97, 'max_query_length': 2, 'unique_queries': 300, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 300}, 'kor': {'number_of_characters': 481069, 'num_samples': 112000, 'num_queries': 10000, 'num_documents': 102000, 'min_document_length': 10, 'average_document_length': 2.72, 'max_document_length': 264, 'unique_documents': 102000, 'min_query_length': 2, 'average_query_length': 20.4, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'lav': {'number_of_characters': 89318, 'num_samples': 14429, 'num_queries': 1312, 'num_documents': 13117, 'min_document_length': 10, 'average_document_length': 4.81, 'max_document_length': 184, 'unique_documents': 13117, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 1312, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1312}, 'lit': {'number_of_characters': 126055, 'num_samples': 20200, 'num_queries': 1837, 'num_documents': 18363, 'min_document_length': 9, 'average_document_length': 4.86, 'max_document_length': 420, 'unique_documents': 18363, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 1837, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1837}, 'mar': {'number_of_characters': 46162, 'num_samples': 8145, 'num_queries': 741, 'num_documents': 7404, 'min_document_length': 14, 'average_document_length': 4.23, 'max_document_length': 184, 'unique_documents': 7404, 'min_query_length': 2, 'average_query_length': 19.98, 'max_query_length': 2, 'unique_queries': 741, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 741}, 'msa': {'number_of_characters': 50848, 'num_samples': 7072, 'num_queries': 643, 'num_documents': 6429, 'min_document_length': 11, 'average_document_length': 5.91, 'max_document_length': 189, 'unique_documents': 6429, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 643, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 643}, 'nld': {'number_of_characters': 1238345, 'num_samples': 380662, 'num_queries': 10000, 'num_documents': 370662, 'min_document_length': 9, 'average_document_length': 1.34, 'max_document_length': 1603, 'unique_documents': 370662, 'min_query_length': 2, 'average_query_length': 74.13, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'nor': {'number_of_characters': 433342, 'num_samples': 69556, 'num_queries': 6324, 'num_documents': 63232, 'min_document_length': 7, 'average_document_length': 4.85, 'max_document_length': 227, 'unique_documents': 63232, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 6324, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 6324}, 'pol': {'number_of_characters': 902187, 'num_samples': 202515, 'num_queries': 10000, 'num_documents': 192515, 'min_document_length': 10, 'average_document_length': 2.69, 'max_document_length': 1038, 'unique_documents': 192515, 'min_query_length': 2, 'average_query_length': 38.5, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'por': {'number_of_characters': 918403, 'num_samples': 219353, 'num_queries': 10000, 'num_documents': 209353, 'min_document_length': 10, 'average_document_length': 2.39, 'max_document_length': 1154, 'unique_documents': 209353, 'min_query_length': 2, 'average_query_length': 41.87, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'ron': {'number_of_characters': 440199, 'num_samples': 65882, 'num_queries': 5990, 'num_documents': 59892, 'min_document_length': 10, 'average_document_length': 5.35, 'max_document_length': 1176, 'unique_documents': 59892, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 5990, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 5990}, 'rus': {'number_of_characters': 1328024, 'num_samples': 397504, 'num_queries': 10000, 'num_documents': 387504, 'min_document_length': 10, 'average_document_length': 1.43, 'max_document_length': 1937, 'unique_documents': 387504, 'min_query_length': 2, 'average_query_length': 77.5, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'slk': {'number_of_characters': 229346, 'num_samples': 34675, 'num_queries': 3153, 'num_documents': 31522, 'min_document_length': 10, 'average_document_length': 5.28, 'max_document_length': 4317, 'unique_documents': 31522, 'min_query_length': 2, 'average_query_length': 19.99, 'max_query_length': 2, 'unique_queries': 3153, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3153}, 'slv': {'number_of_characters': 114951, 'num_samples': 17785, 'num_queries': 1617, 'num_documents': 16168, 'min_document_length': 11, 'average_document_length': 5.11, 'max_document_length': 426, 'unique_documents': 16168, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 1617, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 1617}, 'spa': {'number_of_characters': 1758740, 'num_samples': 614661, 'num_queries': 10000, 'num_documents': 604661, 'min_document_length': 10, 'average_document_length': 0.91, 'max_document_length': 559, 'unique_documents': 604661, 'min_query_length': 2, 'average_query_length': 120.93, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'sqi': {'number_of_characters': 15784, 'num_samples': 2285, 'num_queries': 208, 'num_documents': 2077, 'min_document_length': 11, 'average_document_length': 5.6, 'max_document_length': 141, 'unique_documents': 2077, 'min_query_length': 2, 'average_query_length': 19.97, 'max_query_length': 2, 'unique_queries': 208, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 208}, 'srp': {'number_of_characters': 40489, 'num_samples': 6407, 'num_queries': 583, 'num_documents': 5824, 'min_document_length': 8, 'average_document_length': 4.95, 'max_document_length': 128, 'unique_documents': 5824, 'min_query_length': 2, 'average_query_length': 19.98, 'max_query_length': 2, 'unique_queries': 583, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 583}, 'swe': {'number_of_characters': 779495, 'num_samples': 168738, 'num_queries': 10000, 'num_documents': 158738, 'min_document_length': 10, 'average_document_length': 2.91, 'max_document_length': 257, 'unique_documents': 158738, 'min_query_length': 2, 'average_query_length': 31.75, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'tgl': {'number_of_characters': 27050, 'num_samples': 4067, 'num_queries': 370, 'num_documents': 3697, 'min_document_length': 13, 'average_document_length': 5.32, 'max_document_length': 198, 'unique_documents': 3697, 'min_query_length': 2, 'average_query_length': 19.98, 'max_query_length': 2, 'unique_queries': 370, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 370}, 'tha': {'number_of_characters': 311898, 'num_samples': 52172, 'num_queries': 4743, 'num_documents': 47429, 'min_document_length': 10, 'average_document_length': 4.58, 'max_document_length': 472, 'unique_documents': 47429, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 4743, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 4743}, 'tur': {'number_of_characters': 706899, 'num_samples': 154846, 'num_queries': 10000, 'num_documents': 144846, 'min_document_length': 7, 'average_document_length': 2.88, 'max_document_length': 703, 'unique_documents': 144846, 'min_query_length': 2, 'average_query_length': 28.97, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'ukr': {'number_of_characters': 478281, 'num_samples': 75352, 'num_queries': 6851, 'num_documents': 68501, 'min_document_length': 10, 'average_document_length': 4.98, 'max_document_length': 1015, 'unique_documents': 68501, 'min_query_length': 2, 'average_query_length': 20.0, 'max_query_length': 2, 'unique_queries': 6851, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 6851}, 'urd': {'number_of_characters': 18987, 'num_samples': 3053, 'num_queries': 278, 'num_documents': 2775, 'min_document_length': 19, 'average_document_length': 4.84, 'max_document_length': 108, 'unique_documents': 2775, 'min_query_length': 2, 'average_query_length': 19.96, 'max_query_length': 2, 'unique_queries': 278, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 278}, 'uzb': {'number_of_characters': 8789, 'num_samples': 1390, 'num_queries': 127, 'num_documents': 1263, 'min_document_length': 16, 'average_document_length': 4.96, 'max_document_length': 110, 'unique_documents': 1263, 'min_query_length': 2, 'average_query_length': 19.89, 'max_query_length': 2, 'unique_queries': 127, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 127}, 'vie': {'number_of_characters': 787166, 'num_samples': 133972, 'num_queries': 10000, 'num_documents': 123972, 'min_document_length': 9, 'average_document_length': 4.35, 'max_document_length': 1061, 'unique_documents': 123972, 'min_query_length': 2, 'average_query_length': 24.79, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}, 'zho': {'number_of_characters': 468187, 'num_samples': 142491, 'num_queries': 10000, 'num_documents': 132491, 'min_document_length': 7, 'average_document_length': 1.53, 'max_document_length': 671, 'unique_documents': 132491, 'min_query_length': 2, 'average_query_length': 26.5, 'max_query_length': 2, 'unique_queries': 10000, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 10000}}}}
WebLINXCandidatesReranking (Xing Han Lù, 2024) ['eng'] Reranking p2p [Academic, Web, Written] None None
WebQAT2ITRetrieval (Chang et al., 2022) ['eng'] Any2AnyRetrieval t2it [Encyclopaedic] {'test': 405707} {'test': {'number_of_characters': 34758630, 'num_samples': 405707, 'num_queries': 2511, 'num_documents': 403196, 'min_document_length': 2, 'average_document_length': 85.62, 'max_document_length': 1342, 'unique_documents': 391293, 'num_document_images': 403196, 'min_query_length': 24, 'average_query_length': 93.59, 'max_query_length': 292, 'unique_queries': 2511, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.44, 'max_relevant_docs_per_query': 2, 'unique_relevant_docs': 3353}}
WebQAT2TRetrieval (Chang et al., 2022) ['eng'] Any2AnyRetrieval t2t [Encyclopaedic] {'test': 546912} {'test': {'number_of_characters': 126324224, 'num_samples': 546912, 'num_queries': 2455, 'num_documents': 544457, 'min_document_length': 57, 'average_document_length': 231.52, 'max_document_length': 959, 'unique_documents': 544457, 'num_document_images': 0, 'min_query_length': 26, 'average_query_length': 109.95, 'max_query_length': 432, 'unique_queries': 2455, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 2.04, 'max_relevant_docs_per_query': 4, 'unique_relevant_docs': 4862}}
WikiCitiesClustering ['eng'] Clustering p2p [Encyclopaedic, Written] None None
WikiClusteringP2P.v2 ['bos', 'cat', 'ces', 'dan', 'eus', 'glv', 'ilo', 'kur', 'lav', 'min', 'mlt', 'sco', 'sqi', 'wln'] Clustering p2p [Encyclopaedic, Written] None None
WikipediaBioMetChemClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaBiolumNeurochemClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaChemEngSpecialtiesClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaChemFieldsClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaChemistryTopicsClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaChemistryTopicsClustering (Kasmaee et al., 2024) ['eng'] Clustering s2p [Chemistry] None None
WikipediaCompChemSpectroscopyClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaCryobiologySeparationClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaCrystallographyAnalyticalClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaGreenhouseEnantiopureClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaIsotopesFissionClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaLuminescenceClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaOrganicInorganicClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaRerankingMultilingual ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] Reranking s2p [Encyclopaedic, Written] {'test': 24000} {'test': {'num_samples': 24000, 'number_of_characters': 83866932, 'num_positive': 24000, 'num_negative': 192000, 'min_query_length': 7, 'avg_query_length': 59.09, 'max_query_length': 180, 'unique_query': 23997, 'min_positive_length': 100, 'avg_positive_length': 385.45, 'max_positive_length': 3515, 'unique_positive': 23993, 'min_negative_length': 100, 'avg_negative_length': 381.24, 'max_negative_length': 9461, 'unique_negative': 191783, 'hf_subset_descriptive_stats': {'bg': {'num_samples': 1500, 'number_of_characters': 5145316, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 60.83, 'max_query_length': 166, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 375.89, 'max_positive_length': 2241, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 374.19, 'max_negative_length': 4869, 'unique_negative': 11996}, 'bn': {'num_samples': 1500, 'number_of_characters': 5390581, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 7, 'avg_query_length': 47.27, 'max_query_length': 123, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.59, 'max_positive_length': 2338, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 393.98, 'max_negative_length': 5104, 'unique_negative': 11996}, 'cs': {'num_samples': 1500, 'number_of_characters': 5079180, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.27, 'max_query_length': 137, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 383.84, 'max_positive_length': 2300, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 368.25, 'max_negative_length': 3487, 'unique_negative': 11982}, 'da': {'num_samples': 1500, 'number_of_characters': 4746132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 56.75, 'max_query_length': 137, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 351.68, 'max_positive_length': 2159, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.46, 'max_negative_length': 2563, 'unique_negative': 11972}, 'de': {'num_samples': 1500, 'number_of_characters': 5483592, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 20, 'avg_query_length': 70.0, 'max_query_length': 180, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 391.54, 'max_positive_length': 2674, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 399.27, 'max_negative_length': 3083, 'unique_negative': 12000}, 'en': {'num_samples': 1500, 'number_of_characters': 6217884, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 68.37, 'max_query_length': 162, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 451.73, 'max_positive_length': 3515, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 453.14, 'max_negative_length': 3662, 'unique_negative': 12000}, 'fa': {'num_samples': 1500, 'number_of_characters': 4732619, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 12, 'avg_query_length': 48.67, 'max_query_length': 119, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 347.7, 'max_positive_length': 2571, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 344.84, 'max_negative_length': 4707, 'unique_negative': 11978}, 'fi': {'num_samples': 1500, 'number_of_characters': 5209132, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.34, 'max_query_length': 132, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 394.71, 'max_positive_length': 2129, 'unique_positive': 1498, 'min_negative_length': 100, 'avg_negative_length': 377.84, 'max_negative_length': 2574, 'unique_negative': 11972}, 'hi': {'num_samples': 1500, 'number_of_characters': 5620959, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 13, 'avg_query_length': 50.78, 'max_query_length': 125, 'unique_query': 1499, 'min_positive_length': 100, 'avg_positive_length': 420.38, 'max_positive_length': 2361, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 409.52, 'max_negative_length': 5912, 'unique_negative': 11996}, 'it': {'num_samples': 1500, 'number_of_characters': 5420496, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 23, 'avg_query_length': 70.05, 'max_query_length': 156, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 396.97, 'max_positive_length': 2082, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 393.33, 'max_negative_length': 9461, 'unique_negative': 11993}, 'nl': {'num_samples': 1500, 'number_of_characters': 5169556, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.34, 'max_query_length': 136, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 380.79, 'max_positive_length': 1864, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 375.03, 'max_negative_length': 3641, 'unique_negative': 11985}, 'pt': {'num_samples': 1500, 'number_of_characters': 5474356, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 18, 'avg_query_length': 65.12, 'max_query_length': 176, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 404.02, 'max_positive_length': 3057, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 397.55, 'max_negative_length': 2877, 'unique_negative': 11991}, 'ro': {'num_samples': 1500, 'number_of_characters': 4796113, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 61.97, 'max_query_length': 169, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 346.71, 'max_positive_length': 1917, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 348.59, 'max_negative_length': 4213, 'unique_negative': 11971}, 'sr': {'num_samples': 1500, 'number_of_characters': 5271732, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 15, 'avg_query_length': 55.67, 'max_query_length': 146, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 386.35, 'max_positive_length': 2421, 'unique_positive': 1499, 'min_negative_length': 100, 'avg_negative_length': 384.06, 'max_negative_length': 3668, 'unique_negative': 11974}, 'no': {'num_samples': 1500, 'number_of_characters': 5036586, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 14, 'avg_query_length': 55.29, 'max_query_length': 129, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 367.72, 'max_positive_length': 1450, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 366.84, 'max_negative_length': 2841, 'unique_negative': 11996}, 'sv': {'num_samples': 1500, 'number_of_characters': 5072698, 'num_positive': 1500, 'num_negative': 12000, 'min_query_length': 17, 'avg_query_length': 57.73, 'max_query_length': 133, 'unique_query': 1500, 'min_positive_length': 100, 'avg_positive_length': 372.59, 'max_positive_length': 2493, 'unique_positive': 1500, 'min_negative_length': 100, 'avg_negative_length': 368.94, 'max_negative_length': 3680, 'unique_negative': 11999}}}}
WikipediaRetrievalMultilingual ['ben', 'bul', 'ces', 'dan', 'deu', 'eng', 'fas', 'fin', 'hin', 'ita', 'nld', 'nor', 'por', 'ron', 'srp', 'swe'] Retrieval s2p [Encyclopaedic, Written] None None
WikipediaSaltsSemiconductorsClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaSolidStateColloidalClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WikipediaSpecialtiesInChemistryClustering (Kasmaee et al., 2024) ['eng'] Clustering s2p [Chemistry] None None
WikipediaTheoreticalAppliedClassification (Kasmaee et al., 2024) ['eng'] Classification s2s [Chemistry] None None
WinoGrande (Xiao et al., 2024) ['eng'] Retrieval s2s [Encyclopaedic, Written] None None
Winoground (Tristan Thrush, 2022) ['eng'] Compositionality i2t [Social] {'test': 400} {'test': {'num_samples': 400, 'num_images': 800, 'num_texts': 800, 'num_unique_texts': 800, 'min_text_length': 8, 'average_text_length': 45.47, 'max_text_length': 151}}
WisesightSentimentClassification ['tha'] Classification s2s [News, Social, Written] None None
XFlickr30kCoT2IRetrieval (Bugliarello et al., 2022) ['deu', 'eng', 'ind', 'jpn', 'rus', 'spa', 'tur', 'zho'] Any2AnyMultilingualRetrieval t2i [Encyclopaedic, Written] {'test': 32000} {'test': {'number_of_characters': 1149877, 'num_samples': 32000, 'num_queries': 16000, 'num_documents': 16000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 16000, 'min_query_length': 12, 'average_query_length': 71.87, 'max_query_length': 385, 'unique_queries': 15987, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 16000, 'hf_subset_descriptive_stats': {'de': {'number_of_characters': 132154, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 4, 'average_query_length': 66.08, 'max_query_length': 220, 'unique_queries': 1994, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'en': {'number_of_characters': 153801, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 34, 'average_query_length': 76.9, 'max_query_length': 377, 'unique_queries': 2000, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'es': {'number_of_characters': 160049, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 23, 'average_query_length': 80.02, 'max_query_length': 342, 'unique_queries': 2000, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'id': {'number_of_characters': 167858, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 4, 'average_query_length': 83.93, 'max_query_length': 211, 'unique_queries': 2000, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'ja': {'number_of_characters': 75480, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 9, 'average_query_length': 37.74, 'max_query_length': 179, 'unique_queries': 2000, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'ru': {'number_of_characters': 149947, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 10, 'average_query_length': 74.97, 'max_query_length': 294, 'unique_queries': 1997, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'tr': {'number_of_characters': 136134, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 19, 'average_query_length': 68.07, 'max_query_length': 199, 'unique_queries': 1997, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}, 'zh': {'number_of_characters': 46454, 'num_samples': 4000, 'num_queries': 2000, 'num_documents': 2000, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 2000, 'min_query_length': 10, 'average_query_length': 23.23, 'max_query_length': 66, 'unique_queries': 1999, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 2000}}}}
XM3600T2IRetrieval (Thapliyal et al., 2022) ['ara', 'ben', 'ces', 'dan', 'deu', 'ell', 'eng', 'fas', 'fil', 'fin', 'fra', 'heb', 'hin', 'hrv', 'hun', 'ind', 'ita', 'jpn', 'kor', 'mri', 'nld', 'nor', 'pol', 'por', 'quz', 'ron', 'rus', 'spa', 'swa', 'swe', 'tel', 'tha', 'tur', 'ukr', 'vie', 'zho'] Any2AnyMultilingualRetrieval t2i [Encyclopaedic, Written] {'test': 390975} {'test': {'number_of_characters': 17009034, 'num_samples': 390975, 'num_queries': 261375, 'num_documents': 129600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 129600, 'min_query_length': 9, 'average_query_length': 65.08, 'max_query_length': 532, 'unique_queries': 259932, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 129600, 'hf_subset_descriptive_stats': {'ar': {'number_of_characters': 310802, 'num_samples': 10967, 'num_queries': 7367, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 8, 'average_query_length': 42.19, 'max_query_length': 208, 'unique_queries': 7339, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'bn': {'number_of_characters': 223622, 'num_samples': 7200, 'num_queries': 3600, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 28, 'average_query_length': 62.12, 'max_query_length': 139, 'unique_queries': 3594, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'cs': {'number_of_characters': 282069, 'num_samples': 10807, 'num_queries': 7207, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 3, 'average_query_length': 39.14, 'max_query_length': 266, 'unique_queries': 6814, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'da': {'number_of_characters': 351028, 'num_samples': 10864, 'num_queries': 7264, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 7, 'average_query_length': 48.32, 'max_query_length': 158, 'unique_queries': 7246, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'de': {'number_of_characters': 660790, 'num_samples': 12243, 'num_queries': 8643, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 11, 'average_query_length': 76.45, 'max_query_length': 334, 'unique_queries': 8643, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'el': {'number_of_characters': 370363, 'num_samples': 10804, 'num_queries': 7204, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 4, 'average_query_length': 51.41, 'max_query_length': 262, 'unique_queries': 7100, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'en': {'number_of_characters': 356488, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 6, 'average_query_length': 49.51, 'max_query_length': 148, 'unique_queries': 7129, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'es': {'number_of_characters': 485004, 'num_samples': 12214, 'num_queries': 8614, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 12, 'average_query_length': 56.3, 'max_query_length': 179, 'unique_queries': 8605, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'fa': {'number_of_characters': 430055, 'num_samples': 10845, 'num_queries': 7245, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 4, 'average_query_length': 59.36, 'max_query_length': 289, 'unique_queries': 7242, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'fi': {'number_of_characters': 464334, 'num_samples': 10727, 'num_queries': 7127, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 1, 'average_query_length': 65.15, 'max_query_length': 336, 'unique_queries': 7110, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'fil': {'number_of_characters': 480287, 'num_samples': 10709, 'num_queries': 7109, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 4, 'average_query_length': 67.56, 'max_query_length': 332, 'unique_queries': 7016, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'fr': {'number_of_characters': 595836, 'num_samples': 12162, 'num_queries': 8562, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 15, 'average_query_length': 69.59, 'max_query_length': 173, 'unique_queries': 8560, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'he': {'number_of_characters': 457775, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 3, 'average_query_length': 63.58, 'max_query_length': 453, 'unique_queries': 7190, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'hi': {'number_of_characters': 509092, 'num_samples': 12103, 'num_queries': 8503, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 3, 'average_query_length': 59.87, 'max_query_length': 188, 'unique_queries': 8422, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'hr': {'number_of_characters': 420595, 'num_samples': 10880, 'num_queries': 7280, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 3, 'average_query_length': 57.77, 'max_query_length': 271, 'unique_queries': 7224, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'hu': {'number_of_characters': 436677, 'num_samples': 10816, 'num_queries': 7216, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 5, 'average_query_length': 60.52, 'max_query_length': 393, 'unique_queries': 7209, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'id': {'number_of_characters': 666387, 'num_samples': 10726, 'num_queries': 7126, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 16, 'average_query_length': 93.51, 'max_query_length': 286, 'unique_queries': 7125, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'it': {'number_of_characters': 608604, 'num_samples': 12071, 'num_queries': 8471, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 15, 'average_query_length': 71.85, 'max_query_length': 201, 'unique_queries': 8470, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'ja': {'number_of_characters': 186672, 'num_samples': 10785, 'num_queries': 7185, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 1, 'average_query_length': 25.98, 'max_query_length': 97, 'unique_queries': 7175, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'ko': {'number_of_characters': 188812, 'num_samples': 11250, 'num_queries': 7650, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 1, 'average_query_length': 24.68, 'max_query_length': 113, 'unique_queries': 7644, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'mi': {'number_of_characters': 262800, 'num_samples': 8332, 'num_queries': 4732, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 7, 'average_query_length': 55.54, 'max_query_length': 304, 'unique_queries': 4707, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'nl': {'number_of_characters': 370231, 'num_samples': 11659, 'num_queries': 8059, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 4, 'average_query_length': 45.94, 'max_query_length': 173, 'unique_queries': 8004, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'no': {'number_of_characters': 391381, 'num_samples': 10813, 'num_queries': 7213, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 5, 'average_query_length': 54.26, 'max_query_length': 162, 'unique_queries': 7191, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'pl': {'number_of_characters': 411189, 'num_samples': 10741, 'num_queries': 7141, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 4, 'average_query_length': 57.58, 'max_query_length': 226, 'unique_queries': 7117, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'pt': {'number_of_characters': 446873, 'num_samples': 10843, 'num_queries': 7243, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 10, 'average_query_length': 61.7, 'max_query_length': 324, 'unique_queries': 7220, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'quz': {'number_of_characters': 278263, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 2, 'average_query_length': 38.65, 'max_query_length': 234, 'unique_queries': 7130, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'ro': {'number_of_characters': 629977, 'num_samples': 10723, 'num_queries': 7123, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 5, 'average_query_length': 88.44, 'max_query_length': 524, 'unique_queries': 7122, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'ru': {'number_of_characters': 477558, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 11, 'average_query_length': 66.33, 'max_query_length': 232, 'unique_queries': 7194, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'sv': {'number_of_characters': 339400, 'num_samples': 10873, 'num_queries': 7273, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 6, 'average_query_length': 46.67, 'max_query_length': 174, 'unique_queries': 7199, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'sw': {'number_of_characters': 444085, 'num_samples': 10646, 'num_queries': 7046, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 2, 'average_query_length': 63.03, 'max_query_length': 299, 'unique_queries': 7014, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'te': {'number_of_characters': 341340, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 16, 'average_query_length': 47.41, 'max_query_length': 132, 'unique_queries': 7062, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'th': {'number_of_characters': 344730, 'num_samples': 10800, 'num_queries': 7200, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 8, 'average_query_length': 47.88, 'max_query_length': 147, 'unique_queries': 7170, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'tr': {'number_of_characters': 458639, 'num_samples': 10833, 'num_queries': 7233, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 7, 'average_query_length': 63.41, 'max_query_length': 453, 'unique_queries': 7224, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'uk': {'number_of_characters': 474311, 'num_samples': 10815, 'num_queries': 7215, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 10, 'average_query_length': 65.74, 'max_query_length': 372, 'unique_queries': 7206, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'vi': {'number_of_characters': 582546, 'num_samples': 10950, 'num_queries': 7350, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 6, 'average_query_length': 79.26, 'max_query_length': 287, 'unique_queries': 7350, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}, 'zh': {'number_of_characters': 165110, 'num_samples': 10774, 'num_queries': 7174, 'num_documents': 3600, 'min_document_length': 0, 'average_document_length': 0, 'max_document_length': 0, 'unique_documents': 0, 'num_document_images': 3600, 'min_query_length': 1, 'average_query_length': 23.02, 'max_query_length': 96, 'unique_queries': 7165, 'num_query_images': 0, 'min_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 1.0, 'max_relevant_docs_per_query': 1, 'unique_relevant_docs': 3600}}}}
XMarket (Bonab et al., 2021) ['deu', 'eng', 'spa'] Retrieval s2p None None
XNLI (Conneau et al., 2018) ['ara', 'bul', 'deu', 'ell', 'eng', 'fra', 'hin', 'rus', 'spa', 'swa', 'tha', 'tur', 'vie', 'zho'] PairClassification s2s [Fiction, Government, Non-fiction, Written] {'test': 19110, 'validation': 19110} {'test': {'num_samples': 19110, 'number_of_characters': 2907145, 'min_sentence1_length': 3, 'avg_sentence1_length': 103.24, 'max_sentence1_length': 401, 'unique_sentence1': 15328, 'min_sentence2_length': 2, 'avg_sentence2_length': 48.89, 'max_sentence2_length': 187, 'unique_sentence2': 19104, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 179591, 'min_sentence1_length': 11, 'avg_sentence1_length': 89.57, 'max_sentence1_length': 242, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 41.99, 'max_sentence2_length': 115, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 220646, 'min_sentence1_length': 14, 'avg_sentence1_length': 110.02, 'max_sentence1_length': 303, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 51.63, 'max_sentence2_length': 150, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241224, 'min_sentence1_length': 3, 'avg_sentence1_length': 119.93, 'max_sentence1_length': 301, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 56.79, 'max_sentence2_length': 187, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 240222, 'min_sentence1_length': 13, 'avg_sentence1_length': 119.05, 'max_sentence1_length': 344, 'unique_sentence1': 1095, 'min_sentence2_length': 13, 'avg_sentence2_length': 56.93, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212223, 'min_sentence1_length': 19, 'avg_sentence1_length': 105.67, 'max_sentence1_length': 268, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 49.8, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232207, 'min_sentence1_length': 11, 'avg_sentence1_length': 115.43, 'max_sentence1_length': 385, 'unique_sentence1': 1094, 'min_sentence2_length': 8, 'avg_sentence2_length': 54.68, 'max_sentence2_length': 163, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 245259, 'min_sentence1_length': 9, 'avg_sentence1_length': 121.1, 'max_sentence1_length': 327, 'unique_sentence1': 1095, 'min_sentence2_length': 10, 'avg_sentence2_length': 58.58, 'max_sentence2_length': 169, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 211312, 'min_sentence1_length': 16, 'avg_sentence1_length': 104.63, 'max_sentence1_length': 401, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 50.17, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 222797, 'min_sentence1_length': 11, 'avg_sentence1_length': 110.77, 'max_sentence1_length': 306, 'unique_sentence1': 1095, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.45, 'max_sentence2_length': 167, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210103, 'min_sentence1_length': 10, 'avg_sentence1_length': 104.44, 'max_sentence1_length': 266, 'unique_sentence1': 1094, 'min_sentence2_length': 2, 'avg_sentence2_length': 49.48, 'max_sentence2_length': 146, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192788, 'min_sentence1_length': 12, 'avg_sentence1_length': 96.69, 'max_sentence1_length': 262, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 44.54, 'max_sentence2_length': 129, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208658, 'min_sentence1_length': 15, 'avg_sentence1_length': 103.68, 'max_sentence1_length': 255, 'unique_sentence1': 1095, 'min_sentence2_length': 6, 'avg_sentence2_length': 49.19, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 223549, 'min_sentence1_length': 14, 'avg_sentence1_length': 111.31, 'max_sentence1_length': 265, 'unique_sentence1': 1095, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.46, 'max_sentence2_length': 143, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 66566, 'min_sentence1_length': 4, 'avg_sentence1_length': 33.04, 'max_sentence1_length': 112, 'unique_sentence1': 1095, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.73, 'max_sentence2_length': 59, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}, 'validation': {'num_samples': 19110, 'number_of_characters': 2909058, 'min_sentence1_length': 5, 'avg_sentence1_length': 103.21, 'max_sentence1_length': 323, 'unique_sentence1': 11171, 'min_sentence2_length': 3, 'avg_sentence2_length': 49.02, 'max_sentence2_length': 172, 'unique_sentence2': 19101, 'unique_labels': 2, 'labels': {'0': {'count': 9562}, '1': {'count': 9548}}, 'hf_subset_descriptive_stats': {'ar': {'num_samples': 1365, 'number_of_characters': 177355, 'min_sentence1_length': 13, 'avg_sentence1_length': 88.32, 'max_sentence1_length': 214, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 41.61, 'max_sentence2_length': 137, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'bg': {'num_samples': 1365, 'number_of_characters': 219988, 'min_sentence1_length': 16, 'avg_sentence1_length': 109.2, 'max_sentence1_length': 316, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 51.97, 'max_sentence2_length': 151, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'de': {'num_samples': 1365, 'number_of_characters': 241852, 'min_sentence1_length': 20, 'avg_sentence1_length': 119.81, 'max_sentence1_length': 298, 'unique_sentence1': 798, 'min_sentence2_length': 12, 'avg_sentence2_length': 57.37, 'max_sentence2_length': 162, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'el': {'num_samples': 1365, 'number_of_characters': 241275, 'min_sentence1_length': 16, 'avg_sentence1_length': 119.88, 'max_sentence1_length': 302, 'unique_sentence1': 798, 'min_sentence2_length': 6, 'avg_sentence2_length': 56.88, 'max_sentence2_length': 171, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'en': {'num_samples': 1365, 'number_of_characters': 212384, 'min_sentence1_length': 20, 'avg_sentence1_length': 105.72, 'max_sentence1_length': 271, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'es': {'num_samples': 1365, 'number_of_characters': 232451, 'min_sentence1_length': 14, 'avg_sentence1_length': 115.17, 'max_sentence1_length': 265, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 55.12, 'max_sentence2_length': 148, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'fr': {'num_samples': 1365, 'number_of_characters': 246857, 'min_sentence1_length': 19, 'avg_sentence1_length': 121.76, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 11, 'avg_sentence2_length': 59.09, 'max_sentence2_length': 172, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'hi': {'num_samples': 1365, 'number_of_characters': 212269, 'min_sentence1_length': 18, 'avg_sentence1_length': 105.06, 'max_sentence1_length': 277, 'unique_sentence1': 798, 'min_sentence2_length': 7, 'avg_sentence2_length': 50.44, 'max_sentence2_length': 152, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'ru': {'num_samples': 1365, 'number_of_characters': 221152, 'min_sentence1_length': 15, 'avg_sentence1_length': 109.75, 'max_sentence1_length': 310, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 52.27, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'sw': {'num_samples': 1365, 'number_of_characters': 210482, 'min_sentence1_length': 13, 'avg_sentence1_length': 104.32, 'max_sentence1_length': 264, 'unique_sentence1': 798, 'min_sentence2_length': 8, 'avg_sentence2_length': 49.88, 'max_sentence2_length': 153, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'th': {'num_samples': 1365, 'number_of_characters': 192640, 'min_sentence1_length': 7, 'avg_sentence1_length': 97.28, 'max_sentence1_length': 255, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 43.84, 'max_sentence2_length': 140, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'tr': {'num_samples': 1365, 'number_of_characters': 208305, 'min_sentence1_length': 15, 'avg_sentence1_length': 102.97, 'max_sentence1_length': 269, 'unique_sentence1': 798, 'min_sentence2_length': 10, 'avg_sentence2_length': 49.64, 'max_sentence2_length': 139, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'vi': {'num_samples': 1365, 'number_of_characters': 224811, 'min_sentence1_length': 18, 'avg_sentence1_length': 112.26, 'max_sentence1_length': 323, 'unique_sentence1': 798, 'min_sentence2_length': 9, 'avg_sentence2_length': 52.43, 'max_sentence2_length': 159, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}, 'zh': {'num_samples': 1365, 'number_of_characters': 67237, 'min_sentence1_length': 5, 'avg_sentence1_length': 33.41, 'max_sentence1_length': 135, 'unique_sentence1': 798, 'min_sentence2_length': 3, 'avg_sentence2_length': 15.85, 'max_sentence2_length': 66, 'unique_sentence2': 1365, 'unique_labels': 2, 'labels': {'0': {'count': 683}, '1': {'count': 682}}}}}}
XNLIV2 (Upadhyay et al., 2023) ['asm', 'ben', 'bho', 'ell', 'guj', 'kan', 'mar', 'ory', 'pan', 'rus', 'san', 'tam', 'tur'] PairClassification s2s [Fiction, Government, Non-fiction, Written] None None
XPQARetrieval (Shen et al., 2023) ['ara', 'cmn', 'deu', 'eng', 'fra', 'hin', 'ita', 'jpn', 'kor', 'pol', 'por', 'spa', 'tam'] Retrieval s2p [Reviews, Written] None None
XQuADRetrieval (Mikel Artetxe, 2019) ['arb', 'deu', 'ell', 'eng', 'hin', 'ron', 'rus', 'spa', 'tha', 'tur', 'vie', 'zho'] Retrieval s2p [Web, Written] None None
XStance ['deu', 'fra', 'ita'] PairClassification s2s [Social, Written] None None
YahooAnswersTopicsClassification (Zhang et al., 2015) ['eng'] Classification s2s [Web, Written] None None
YelpReviewFullClassification (Zhang et al., 2015) ['eng'] Classification s2s [Reviews, Written] None None
YueOpenriceReviewClassification (Xiang et al., 2019) ['yue'] Classification s2s [Reviews, Spoken] None None
indonli ['ind'] PairClassification s2s [Encyclopaedic, News, Web, Written] None None
mFollowIRCrossLingualInstructionRetrieval (Weller et al., 2024) ['eng', 'fas', 'rus', 'zho'] Retrieval s2p [News, Written] {'test': 121758} {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283654099, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 75, 'min_instruction_length': 93, 'average_instruction_length': 389.95, 'max_instruction_length': 887, 'unique_instructions': 75, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 450.55, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'eng-fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129597567, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 80.08, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 150, 'average_instruction_length': 396.88, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 205, 'average_changed_instruction_length': 463.18, 'max_changed_instruction_length': 974, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109522175, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 32, 'average_query_length': 81.88, 'max_query_length': 173, 'unique_queries': 40, 'min_instruction_length': 93, 'average_instruction_length': 371.12, 'max_instruction_length': 887, 'unique_instructions': 40, 'min_changed_instruction_length': 180, 'average_changed_instruction_length': 431.8, 'max_changed_instruction_length': 957, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'eng-zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44534357, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 32, 'average_query_length': 83.56, 'max_query_length': 159, 'unique_queries': 43, 'min_instruction_length': 157, 'average_instruction_length': 401.02, 'max_instruction_length': 731, 'unique_instructions': 43, 'min_changed_instruction_length': 209, 'average_changed_instruction_length': 456.26, 'max_changed_instruction_length': 822, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}}
mFollowIRInstructionRetrieval (Weller et al., 2024) ['fas', 'rus', 'zho'] Retrieval s2p [News, Written] {'test': 121758} {'test': {'num_samples': 121758, 'num_docs': 121635, 'num_queries': 123, 'number_of_characters': 283622456, 'min_document_length': 74, 'average_document_length': 2331.08, 'max_document_length': 24179, 'unique_docs': 121635, 'min_query_length': 10, 'average_query_length': 57.11, 'max_query_length': 136, 'unique_queries': 123, 'min_instruction_length': 37, 'average_instruction_length': 281.07, 'max_instruction_length': 1009, 'unique_instructions': 123, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 326.94, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 123, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 10.43, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000, 'hf_subset_descriptive_stats': {'fas': {'num_samples': 41229, 'num_docs': 41189, 'num_queries': 40, 'number_of_characters': 129593838, 'min_document_length': 99, 'average_document_length': 3145.5, 'max_document_length': 24179, 'unique_docs': 41189, 'min_query_length': 34, 'average_query_length': 72.65, 'max_query_length': 124, 'unique_queries': 40, 'min_instruction_length': 121, 'average_instruction_length': 358.93, 'max_instruction_length': 759, 'unique_instructions': 40, 'min_changed_instruction_length': 163, 'average_changed_instruction_length': 415.32, 'max_changed_instruction_length': 842, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.85, 'max_average_relevant_docs_per_query': 22, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'rus': {'num_samples': 39366, 'num_docs': 39326, 'num_queries': 40, 'number_of_characters': 109523683, 'min_document_length': 75, 'average_document_length': 2784.08, 'max_document_length': 24061, 'unique_docs': 39326, 'min_query_length': 26, 'average_query_length': 77.5, 'max_query_length': 136, 'unique_queries': 40, 'min_instruction_length': 78, 'average_instruction_length': 387.0, 'max_instruction_length': 1009, 'unique_instructions': 40, 'min_changed_instruction_length': 187, 'average_changed_instruction_length': 458.0, 'max_changed_instruction_length': 1083, 'unique_changed_instructions': 40, 'min_average_relevant_docs_per_query': 0, 'average_relevant_docs_per_query': 9.78, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}, 'zho': {'num_samples': 41163, 'num_docs': 41120, 'num_queries': 43, 'number_of_characters': 44504935, 'min_document_length': 74, 'average_document_length': 1082.05, 'max_document_length': 23840, 'unique_docs': 41120, 'min_query_length': 10, 'average_query_length': 23.7, 'max_query_length': 44, 'unique_queries': 43, 'min_instruction_length': 37, 'average_instruction_length': 110.09, 'max_instruction_length': 209, 'unique_instructions': 43, 'min_changed_instruction_length': 44, 'average_changed_instruction_length': 122.81, 'max_changed_instruction_length': 229, 'unique_changed_instructions': 43, 'min_average_relevant_docs_per_query': 1, 'average_relevant_docs_per_query': 10.65, 'max_average_relevant_docs_per_query': 24, 'min_average_top_ranked_per_query': 1000, 'average_top_ranked_per_query': 1000.0, 'max_average_top_ranked_per_query': 1000}}}}
mMARCO-NL (Luiz Bonifacio and Israel Campiotti and Roberto de Alencar Lotufo and Rodrigo Frassetto Nogueira, 2021) ['nld'] Retrieval s2p [Web, Written] None None

Task Count per Language

| ISO Code | Language | Family | Any2AnyMultiChoice | Any2AnyMultilingualRetrieval | Any2AnyRetrieval | BitextMining | Classification | Clustering | Compositionality | DocumentUnderstanding | ImageClassification | ImageClustering | ImageMultilabelClassification | InstructionRetrieval | MultilabelClassification | PairClassification | Reranking | Retrieval | STS | Speed | Summarization | VisionCentric | VisualSTS(eng) | VisualSTS(multi) | ZeroShotClassification | Sum | |---|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|------|---| | aai | Arifama-Miniafia | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aak | Ankave | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aau | Abau | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aaz | Amarasi | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | abs | Ambonese Malay | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | abt | Ambulas | Ndu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | abx | Inabaknon | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aby | Aneme Wake | Yareban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ace | Achinese | Austronesian | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | acf | Saint Lucian Creole French | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | acm | Mesopotamian Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | acq | Ta'izzi-Adeni Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | acr | Achi | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | acu | Achuar-Shiwiar | Chicham | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | adz | Adzera | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aeb | Tunisian Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | aer | Eastern Arrernte | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aey | Amele | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | afr | Afrikaans | Indo-European | 0 | 0 | 0 | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | | agd | Agarabi | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agg | Angor | Senagi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agm | Angaataha | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agn | Agutaynen | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agr | Aguaruna | Chicham | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agt | Central Cagayan Agta | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | agu | Aguacateco | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aia | Arosi | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aii | Assyrian Neo-Aramaic | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ajp | South Levantine Arabic | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | aka | Akan | Atlantic-Congo | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ake | Akawaio | Cariban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | alp | Alune | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | alq | Algonquin | Algic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | als | Tosk Albanian | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | aly | Alyawarr | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ame | Yanesha' | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amf | Hamer-Banna | South Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amh | Amharic | Afro-Asiatic | 0 | 0 | 0 | 3 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | | amk | Ambai | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amm | Ama (Papua New Guinea) | Left May | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amn | Amanab | Border | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amo | Amo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amp | Alamblak | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amr | Amarakaeri | Harakmbut | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amu | Guerrero Amuzgo | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | amx | Anmatyerre | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ang | Old English (ca. 450-1100) | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | anh | Nend | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | anp | Angika | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | anv | Denya | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aoi | Anindilyakwa | Gunwinyguan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aoj | Mufian | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aom | Ömie | Koiarian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aon | Bumbita Arapesh | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apb | Sa'a | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apc | Levantine Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ape | Bukiyip | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apn | Apinayé | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apr | Arop-Lokep | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apu | Apurinã | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apw | Western Apache | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | apz | Safeyoka | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ara | Arabic | Unclassified | 0 | 2 | 0 | 4 | 12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 10 | 2 | 0 | 0 | 0 | 0 | 2 | 0 | 36 | | arb | Standard Arabic | Afro-Asiatic | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | are | Western Arrarnta | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | arl | Arabela | Zaparoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | arn | Mapudungun | Araucanian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | arp | Arapaho | Algic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | arq | Algerian Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ars | Najdi Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ary | Moroccan Arabic | Afro-Asiatic | 0 | 0 | 0 | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | arz | Egyptian Arabic | Afro-Asiatic | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | asm | Assamese | Indo-European | 0 | 0 | 0 | 5 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | | aso | Dano | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ast | Asturian | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ata | Pele-Ata | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | atb | Zaiwa | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | atd | Ata Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | atg | Ivbie North-Okpela-Arhe | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | att | Pamplona Atta | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | auc | Waorani | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | aui | Anuki | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | auy | Awiyaana | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | avt | Au | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | awa | Awadhi | Indo-European | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | awb | Awa (Papua New Guinea) | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | awk | Awabakal | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | awx | Awara | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ayr | Central Aymara | Aymaran | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | azb | South Azerbaijani | Turkic | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | aze | Azerbaijani | Unclassified | 0 | 0 | 0 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | azg | San Pedro Amuzgos Amuzgo | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | azj | North Azerbaijani | Turkic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | azz | Highland Puebla Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bak | Bashkir | Turkic | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | bam | Bambara | Mande | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | ban | Balinese | Austronesian | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | bao | Waimaha | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bba | Baatonum | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bbb | Barai | Koiarian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bbc | Batak Toba | Austronesian | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | bbr | Girawa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bch | Bariai | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bco | Kaluli | Bosavi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bdd | Bunama | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bea | Beaver | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bef | Benabena | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bel | Belarusian | Indo-European | 0 | 0 | 0 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | bem | Bemba (Zambia) | Atlantic-Congo | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ben | Bengali | Indo-European | 0 | 1 | 0 | 9 | 9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 32 | | beo | Beami | Bosavi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ber | Berber (Other) | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | beu | Blagar | Timor-Alor-Pantar | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bew | Betawi | Austronesian | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | bgc | Haryanvi | Indo-European | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | bgs | Tagabawa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bgt | Bughotu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bhb | Bhili | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bhd | Bhadrawahi | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bhg | Binandere | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bhl | Bimin | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bho | Bhojpuri | Indo-European | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | bhp | Bima | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | big | Biangai | Kunimaipan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjj | Kanauji | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjk | Barok | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjn | Banjar | Austronesian | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | bjp | Fanamaket | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjr | Binumarien | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjv | Bedjond | Central Sudanic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bjz | Baruga | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bkd | Binukid | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bki | Baki | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bkq | Bakairí | Cariban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bkx | Baikeno | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | blw | Balangao | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | blz | Balantak | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bmh | Kein | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bmk | Ghayavi | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bmr | Muinane | Boran | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bmu | Somba-Siawari | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bnp | Bola | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bns | Bundeli | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | boa | Bora | Boran | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bod | Tibetan | Sino-Tibetan | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | boj | Anjam | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bon | Bine | Eastern Trans-Fly | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bos | Bosnian | Indo-European | 0 | 0 | 0 | 3 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | box | Buamu | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | boy | Bodo (Central African Republic) | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bpr | Koronadal Blaan | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bps | Sarangani Blaan | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bqc | Boko (Benin) | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bqp | Busa | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bra | Braj | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bre | Breton | Indo-European | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | brx | Bodo (India) | Sino-Tibetan | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | bsj | Bangwinji | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bsn | Barasana-Eduria | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bsp | Baga Sitemu | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bss | Akoose | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bug | Buginese | Austronesian | 0 | 0 | 0 | 2 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | buk | Bugawac | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bul | Bulgarian | Indo-European | 0 | 1 | 0 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 17 | | bus | Bokobaru | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bvd | Baeggu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bvr | Burarra | Maningrida | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bxh | Buhutu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | byr | Baruya | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | byx | Qaqet | Baining | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bzd | Bribri | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bzh | Mapos Buang | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | bzj | Belize Kriol English | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | caa | Chortí | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cab | Garifuna | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cac | Chuj | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | caf | Southern Carrier | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cak | Kaqchikel | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cao | Chácobo | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cap | Chipaya | Uru-Chipaya | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | car | Galibi Carib | Cariban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cat | Catalan | Indo-European | 0 | 0 | 0 | 5 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | | cav | Cavineña | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cax | Chiquitano | Chiquitano | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbc | Carapana | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbi | Chachi | Barbacoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbk | Chavacano | Indo-European | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | cbr | Cashibo-Cacataibo | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbs | Cashinahua | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbt | Chayahuita | Cahuapanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbu | Candoshi-Shapra | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cbv | Cacua | Kakua-Nukak | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cco | Comaltepec Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ceb | Cebuano | Austronesian | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | cek | Eastern Khumi Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ces | Czech | Indo-European | 0 | 1 | 0 | 6 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | | cgc | Kagayanen | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cha | Chamorro | Austronesian | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | chd | Highland Oaxaca Chontal | Tequistlatecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | chf | Tabasco Chontal | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | chk | Chuukese | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | chq | Quiotepec Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | chv | Chuvash | Turkic | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | chz | Ozumacín Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cjk | Chokwe | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | cjo | Ashéninka Pajonal | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cjv | Chuave | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ckb | Central Kurdish | Indo-European | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | cle | Lealao Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | clu | Caluyanun | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cme | Cerma | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cmn | Mandarin Chinese | Sino-Tibetan | 0 | 0 | 0 | 4 | 10 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 4 | 10 | 9 | 0 | 0 | 0 | 0 | 2 | 0 | 46 | | cmo | Central Mnong | Austroasiatic | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | cni | Asháninka | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cnl | Lalana Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cnt | Tepetotutla Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | code | unknown | Unclassified | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 37 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 41 | | cof | Colorado | Barbacoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | con | Cofán | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cop | Coptic | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cor | Cornish | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cot | Caquinte | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cpa | Palantla Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cpb | Ucayali-Yurúa Ashéninka | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cpc | Ajyíninka Apurucayali | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cpu | Pichis Ashéninka | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cpy | South Ucayali Ashéninka | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | crh | Crimean Tatar | Turkic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | crn | El Nayar Cora | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | crx | Carrier | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | csb | Kashubian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cso | Sochiapam Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | csy | Siyin Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cta | Tataltepec Chatino | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cth | Thaiphum Chin | Bookkeeping | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ctp | Western Highland Chatino | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ctu | Chol | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cub | Cubeo | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cuc | Usila Chinantec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cui | Cuiba | Guahiboan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cuk | San Blas Kuna | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cut | Teutila Cuicatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cux | Tepeuxila Cuicatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cwe | Kwere | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cya | Nopala Chatino | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | cym | Welsh | Indo-European | 0 | 0 | 0 | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | daa | Dangaléat | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dad | Marik | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dah | Gwahatike | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dan | Danish | Indo-European | 0 | 2 | 0 | 7 | 9 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 28 | | ded | Dedua | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | deu | German | Indo-European | 0 | 2 | 0 | 8 | 14 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 7 | 2 | 20 | 4 | 0 | 0 | 0 | 0 | 4 | 0 | 69 | | dgc | Casiguran Dumagat Agta | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgr | Dogrib | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dgz | Daga | Dagan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dhg | Dhangu-Djangu | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dif | Dieri | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dik | Southwestern Dinka | Nilotic | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | div | Dhivehi | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dji | Djinang | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | djk | Eastern Maroon Creole | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | djr | Djambarrpuyngu | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dob | Dobu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | doi | Dogri (macrolanguage) | Unclassified | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | dop | Lukpa | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dov | Dombe | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dsb | Lower Sorbian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dtp | Kadazan Dusun | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dwr | Dawro | Ta-Ne-Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dww | Dawawa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dwy | Dhuwaya | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dyu | Dyula | Mande | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | dza | Tunzu | Atlantic-Congo | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | dzo | Dzongkha | Sino-Tibetan | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ebk | Eastern Bontok | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | eko | Koti | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ell | Modern Greek (1453-) | Indo-European | 0 | 2 | 0 | 5 | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | | emi | Mussau-Emira | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | emp | Northern Emberá | Chocoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | eng | English | Indo-European | 0 | 3 | 49 | 19 | 160 | 21 | 7 | 10 | 22 | 5 | 0 | 3 | 1 | 13 | 9 | 113 | 13 | 2 | 1 | 6 | 7 | 3 | 24 | 491 | | enq | Enga | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | epo | Esperanto | Artificial Language | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | eri | Ogea | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ese | Ese Ejja | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | esk | Northwest Alaska Inupiatun | Eskimo-Aleut | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | est | Estonian | Uralic | 0 | 1 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | | etr | Edolo | Bosavi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | eus | Basque | Unclassified | 0 | 0 | 0 | 3 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | ewe | Ewe | Atlantic-Congo | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | faa | Fasu | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fai | Faiwol | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fao | Faroese | Indo-European | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | far | Fataleka | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fas | Persian | Indo-European | 0 | 1 | 0 | 6 | 28 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 2 | 41 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 94 | | ffm | Maasina Fulfulde | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fij | Fijian | Austronesian | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | fil | Filipino | Austronesian | 0 | 1 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | fin | Finnish | Uralic | 0 | 1 | 0 | 5 | 5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 2 | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 23 | | fon | Fon | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | for | Fore | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fra | French | Indo-European | 0 | 1 | 0 | 9 | 13 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 6 | 3 | 17 | 4 | 0 | 1 | 0 | 0 | 4 | 0 | 67 | | fry | Western Frisian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuc | Pulaar | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fue | Borgu Fulfulde | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuf | Pular | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fuh | Western Niger Fulfulde | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | fur | Friulian | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | fuv | Nigerian Fulfulde | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | gah | Alekano | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gai | Borei | Ramu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gam | Kandawo | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gaw | Nobonob | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gaz | West Central Oromo | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | gbm | Garhwali | Indo-European | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | gdn | Umanakaina | Dagan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gdr | Wipi | Eastern Trans-Fly | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | geb | Kire | Ramu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gfk | Patpatar | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ghs | Guhu-Samane | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gla | Scottish Gaelic | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | gle | Irish | Indo-European | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | glg | Galician | Indo-European | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | glk | Gilaki | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | glv | Manx | Indo-European | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gmv | Gamo | Ta-Ne-Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gng | Ngangam | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gnn | Gumatj | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gnw | Western Bolivian Guaraní | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gof | Gofa | Ta-Ne-Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gom | Goan Konkani | Indo-European | 0 | 0 | 0 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | grc | Ancient Greek (to 1453) | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | grn | Guarani | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | gsw | Swiss German | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gub | Guajajára | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | guh | Guahibo | Guahiboan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gui | Eastern Bolivian Guaraní | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | guj | Gujarati | Indo-European | 0 | 0 | 0 | 6 | 6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | | gul | Sea Island Creole English | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gum | Guambiano | Barbacoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gun | Mbyá Guaraní | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | guo | Guayabero | Guahiboan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gup | Gunwinggu | Gunwinyguan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gux | Gourmanchéma | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gvc | Guanano | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gvf | Golin | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gvn | Kuku-Yalanji | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gvs | Gumawana | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gwi | Gwichʼin | Athabaskan-Eyak-Tlingit | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gym | Ngäbere | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | gyr | Guarayu | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hat | Haitian | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | hau | Hausa | Afro-Asiatic | 0 | 0 | 0 | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | | haw | Hawaiian | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hbo | Ancient Hebrew | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hch | Huichol | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | heb | Hebrew | Afro-Asiatic | 0 | 1 | 0 | 6 | 5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | | heg | Helong | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hin | Hindi | Indo-European | 0 | 1 | 0 | 11 | 12 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 2 | 11 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 43 | | hix | Hixkaryána | Cariban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hla | Halia | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hlt | Matu Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hmn | Hmong | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hmo | Hiri Motu | Pidgin | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hne | Chhattisgarhi | Indo-European | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | hns | Caribbean Hindustani | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hop | Hopi | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hot | Hote | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hrv | Croatian | Indo-European | 0 | 1 | 0 | 6 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14 | | hsb | Upper Sorbian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hto | Minica Huitoto | Huitotoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hub | Huambisa | Chicham | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hui | Huli | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hun | Hungarian | Uralic | 0 | 1 | 0 | 7 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | | hus | Huastec | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | huu | Murui Huitoto | Huitotoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | huv | San Mateo Del Mar Huave | Huavean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hvn | Sabu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | hye | Armenian | Indo-European | 0 | 0 | 0 | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | ian | Iatmul | Ndu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ibo | Igbo | Atlantic-Congo | 0 | 0 | 0 | 3 | 5 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | | ido | Ido | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ign | Ignaciano | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ikk | Ika | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ikw | Ikwere | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ile | Interlingue | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ilo | Iloko | Austronesian | 0 | 0 | 0 | 2 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | imo | Imbongu | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ina | Interlingua (International Auxiliary Language Association) | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | inb | Inga | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ind | Indonesian | Austronesian | 0 | 3 | 0 | 8 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 5 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 27 | | ino | Inoke-Yate | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | iou | Tuma-Irumu | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ipi | Ipili | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | isl | Icelandic | Indo-European | 0 | 0 | 0 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | | isn | Isanzu | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ita | Italian | Indo-European | 0 | 1 | 0 | 7 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 1 | 7 | 3 | 0 | 0 | 0 | 0 | 4 | 0 | 36 | | iws | Sepik Iwam | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ixl | Ixil | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jac | Popti' | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jae | Yabem | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jao | Yanyuwa | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jav | Javanese | Austronesian | 0 | 0 | 0 | 4 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | | jic | Tol | Jicaquean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jid | Bu (Kaduna State) | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jiv | Shuar | Chicham | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jni | Janji | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | jpn | Japanese | Japonic | 0 | 3 | 0 | 7 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 14 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 42 | | jvn | Caribbean Javanese | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kab | Kabyle | Afro-Asiatic | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kac | Kachin | Sino-Tibetan | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kam | Kamba (Kenya) | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kan | Kannada | Dravidian | 0 | 0 | 0 | 6 | 7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | | kaq | Capanahua | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kas | Kashmiri | Indo-European | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | kat | Georgian | Kartvelian | 0 | 0 | 0 | 6 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | | kaz | Kazakh | Turkic | 0 | 0 | 0 | 5 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | | kbc | Kadiwéu | Guaicuruan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kbh | Camsá | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kbm | Iwal | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kbp | Kabiyè | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kbq | Kamano | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kdc | Kutu | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kde | Makonde | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kdl | Tsikimba | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kea | Kabuverdianu | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kek | Kekchí | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ken | Kenyang | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kew | West Kewa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kfg | Kudiya | Dravidian | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kfy | Kumaoni | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kgf | Kube | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kgk | Kaiwá | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kgp | Kaingang | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | khk | Halh Mongolian | Mongolic-Khitan | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | khm | Khmer | Austroasiatic | 0 | 0 | 0 | 3 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | khs | Kasua | Bosavi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | khz | Keapara | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kik | Kikuyu | Atlantic-Congo | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kin | Kinyarwanda | Atlantic-Congo | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | kir | Kirghiz | Turkic | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | kiw | Northeast Kiwai | Kiwaian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kiz | Kisi | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kje | Kisar | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kjs | East Kewa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kkc | Odoodee | East Strickland | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kkl | Kosarek Yale | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | klt | Nukna | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | klv | Maskelynes | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmb | Kimbundu | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kmg | Kâte | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmh | Kalam | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmk | Limos Kalinga | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmo | Kwoma | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmr | Northern Kurdish | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | kms | Kamasau | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kmu | Kanite | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knc | Central Kanuri | Saharan | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kne | Kankanaey | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knf | Mankanya | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knj | Western Kanjobal | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | knv | Tabo | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kon | Kongo | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kor | Korean | Koreanic | 0 | 2 | 0 | 6 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 1 | 10 | 3 | 0 | 0 | 0 | 0 | 2 | 0 | 39 | | kos | Kosraean | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpf | Komba | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpg | Kapingamarangi | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpj | Karajá | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpr | Korafe-Yegha | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpw | Kobon | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kpx | Mountain Koiali | Koiarian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kqa | Mum | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kqc | Doromu-Koki | Manubaran | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kqf | Kakabai | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kql | Kyenele | Yuat | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kqw | Kandas | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | krc | Karachay-Balkar | Turkic | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ksd | Kuanua | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ksj | Uare | Kwalean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ksr | Borong | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ktm | Kurti | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kto | Kuot | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kud | 'Auhelawa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kue | Kuman (Papua New Guinea) | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kup | Kunimaipa | Kunimaipan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kur | Kurdish | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | kvg | Kuni-Boazi | Anim | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kvn | Border Kuna | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kwd | Kwaio | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kwf | Kwara'ae | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kwi | Awa-Cuaiquer | Barbacoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kwj | Kwanga | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kyc | Kyaka | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kyf | Kouya | Kru | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kyg | Keyagana | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kyq | Kenga | Central Sudanic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kyz | Kayabí | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kze | Kosena | Bookkeeping | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | kzj | Coastal Kadazan | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lac | Lacandon | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lao | Lao | Tai-Kadai | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | lat | Latin | Indo-European | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | lav | Latvian | Indo-European | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | lbb | Label | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lbk | Central Bontok | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lcm | Tungag | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | leu | Kara (Papua New Guinea) | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lex | Luang | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lfn | Lingua Franca Nova | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lgl | Wala | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lid | Nyindrou | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lif | Limbu | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lij | Ligurian | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | lim | Limburgan | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | lin | Lingala | Atlantic-Congo | 0 | 0 | 0 | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | lit | Lithuanian | Indo-European | 0 | 0 | 0 | 6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | | llg | Lole | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | lmo | Lombard | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | ltg | Latgalian | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | ltz | Luxembourgish | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | lua | Luba-Lulua | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | lug | Ganda | Atlantic-Congo | 0 | 0 | 0 | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | luo | Luo (Kenya and Tanzania) | Nilotic | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | lus | Lushai | Sino-Tibetan | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | lvs | Standard Latvian | Unclassified | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | lww | Lewo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | maa | San Jerónimo Tecóatl Mazatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mad | Madurese | Austronesian | 0 | 0 | 0 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | mag | Magahi | Indo-European | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | mai | Maithili | Indo-European | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | maj | Jalapa De Díaz Mazatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mak | Makasar | Austronesian | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | mal | Malayalam | Dravidian | 0 | 0 | 0 | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | | mam | Mam | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | maq | Chiquihuitlán Mazatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mar | Marathi | Indo-European | 0 | 0 | 0 | 9 | 6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 23 | | mau | Huautla Mazatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mav | Sateré-Mawé | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | max | North Moluccan Malay | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | maz | Central Mazahua | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbb | Western Bukidnon Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbc | Macushi | Cariban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbh | Mangseng | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbj | Nadëb | Naduhup | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbl | Maxakalí | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbs | Sarangani Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mbt | Matigsalug Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mca | Maca | Mataguayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcb | Machiguenga | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcd | Sharanahua | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcf | Matsés | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mco | Coatlán Mixe | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcp | Makaa | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcq | Ese | Koiarian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mcr | Menya | Angan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mdy | Male (Ethiopia) | Ta-Ne-Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | med | Melpa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mee | Mengen | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mek | Mekeo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | meq | Merey | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | met | Mato | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | meu | Motu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mey | Hassaniyya | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mgc | Morokodo | Central Sudanic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mgh | Makhuwa-Meetto | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mgw | Matumbi | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mhl | Mauwake | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mhr | Eastern Mari | Uralic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mib | Atatláhuca Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mic | Mi'kmaq | Algic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mie | Ocotepec Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mig | San Miguel El Grande Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mih | Chayuco Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mil | Peñoles Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | min | Minangkabau | Austronesian | 0 | 0 | 0 | 3 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | mio | Pinotepa Nacional Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mir | Isthmus Mixe | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mit | Southern Puebla Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | miz | Coatzospan Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mjc | San Juan Colorado Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mkd | Macedonian | Indo-European | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | mkj | Mokilese | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mkl | Mokole | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mkn | Kupang Malay | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mks | Silacayoapan Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mle | Manambu | Ndu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mlg | Malagasy | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mlh | Mape | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mlp | Bargam | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mlt | Maltese | Afro-Asiatic | 0 | 0 | 0 | 2 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | mmo | Mangga Buang | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mmx | Madak | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mna | Mbula | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mni | Manipuri | Sino-Tibetan | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | mon | Mongolian | Unclassified | 0 | 0 | 0 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | mop | Mopán Maya | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mos | Mossi | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | mox | Molima | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mph | Maung | Iwaidjan Proper | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mpj | Martu Wangka | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mpm | Yosondúa Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mpp | Migabac | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mps | Dadibi | Teberan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mpt | Mian | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mpx | Misima-Panaeati | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mqb | Mbuko | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mqj | Mamasa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mri | Maori | Austronesian | 0 | 1 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | msa | Malay (macrolanguage) | Unclassified | 0 | 0 | 0 | 3 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | msb | Masbatenyo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msc | Sankaran Maninka | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msk | Mansaka | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msm | Agusan Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | msy | Aruamu | Ramu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mti | Maiwa (Papua New Guinea) | Dagan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mto | Totontepec Mixe | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mui | Musi | Austronesian | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | mup | Malvi | Indo-European | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | mux | Bo-Ung | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | muy | Muyang | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mva | Manam | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mvn | Minaveha | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mwc | Are | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mwe | Mwera (Chimwera) | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mwf | Murrinh-Patha | Southern Daly | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mwp | Kala Lagaw Ya | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mwr | Marwari | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mxb | Tezoatlán Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mxp | Tlahuitoltepec Mixe | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mxq | Juquila Mixe | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mxt | Jamiltepec Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mya | Burmese | Sino-Tibetan | 0 | 0 | 0 | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | myk | Mamara Senoufo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | myu | Mundurukú | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | myw | Muyuw | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | myy | Macuna | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | mzz | Maiadomu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nab | Southern Nambikuára | Nambiquaran | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | naf | Nabak | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nak | Nakanai | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nas | Naasioi | South Bougainville | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nbl | South Ndebele | Unclassified | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nbq | Nggem | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nca | Iyo | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nch | Central Huasteca Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ncj | Northern Puebla Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ncl | Michoacán Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ncu | Chumburung | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nde | North Ndebele | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ndg | Ndengereko | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ndj | Ndamba | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nds | Low German | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nep | Nepali (macrolanguage) | Unclassified | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | nfa | Dhao | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ngp | Ngulu | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ngu | Guerrero Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhe | Eastern Huasteca Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhg | Tetelcingo Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhi | Zacatlán-Ahuacatlán-Tepetzintla Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nho | Takuu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhr | Naro | Khoe-Kwadi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhu | Noone | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhw | Western Huasteca Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nhy | Northern Oaxaca Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nif | Nek | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nii | Nii | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nij | Ngaju | Austronesian | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | nin | Ninzo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nko | Nkonya | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nld | Dutch | Indo-European | 0 | 1 | 0 | 8 | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 30 | 2 | 0 | 0 | 0 | 0 | 4 | 0 | 55 | | nlg | Gela | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nna | Nyangumarta | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nno | Norwegian Nynorsk | Unclassified | 0 | 0 | 0 | 4 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | nnq | Ngindo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | noa | Woun Meu | Chocoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nob | Norwegian Bokmål | Unclassified | 0 | 0 | 0 | 4 | 7 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19 | | noe | Nimadi | Indo-European | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nop | Numanggang | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nor | Norwegian | Indo-European | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | not | Nomatsiguenga | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nou | Ewage-Notu | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nov | Novial | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | npi | Nepali (individual language) | Indo-European | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | npl | Southeastern Puebla Nahuatl | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nqo | N'Ko | Artificial Language | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | nsn | Nehan | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nso | Pedi | Atlantic-Congo | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | nss | Nali | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ntj | Ngaanyatjarra | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ntp | Northern Tepehuan | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ntu | Natügu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nus | Nuer | Nilotic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | nuy | Nunggubuyu | Gunwinyguan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nvm | Namiae | Koiarian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nwi | Southwest Tanna | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nya | Nyanja | Atlantic-Congo | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | nys | Nyungar | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | nyu | Nyungwe | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | obo | Obo Manobo | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | oci | Occitan (post 1500) | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | okv | Orokaiva | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | omw | South Tairora | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ong | Olo | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ons | Ono | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ood | Tohono O'odham | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | opm | Oksapmin | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ori | Oriya (macrolanguage) | Unclassified | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | orm | Oromo | Unclassified | 0 | 0 | 0 | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | orv | Old Russian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ory | Odia | Indo-European | 0 | 0 | 0 | 5 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | | ote | Mezquital Otomi | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | otm | Eastern Highland Otomi | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | otn | Tenango Otomi | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | otq | Querétaro Otomi | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ots | Estado de México Otomi | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pab | Parecís | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pad | Paumarí | Arawan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pag | Pangasinan | Austronesian | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | pah | Tenharim | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pam | Pampanga | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pan | Panjabi | Indo-European | 0 | 0 | 0 | 6 | 6 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 18 | | pao | Northern Paiute | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pap | Papiamento | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | pbt | Southern Pashto | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | pcm | Nigerian Pidgin | Indo-European | 0 | 0 | 0 | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | pes | Iranian Persian | Indo-European | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | pib | Yine | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pio | Piapoco | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pir | Piratapuyo | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | piu | Pintupi-Luritja | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pjt | Pitjantjatjara | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pls | San Marcos Tlacoyalco Popoloca | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | plt | Plateau Malagasy | Austronesian | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | plu | Palikúr | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pma | Paama | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pms | Piemontese | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | poe | San Juan Atzingo Popoloca | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | poh | Poqomchi' | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | poi | Highland Popoluca | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pol | Polish | Indo-European | 0 | 1 | 0 | 6 | 11 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 4 | 0 | 19 | 4 | 0 | 0 | 0 | 0 | 2 | 0 | 52 | | pon | Pohnpeian | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | por | Portuguese | Indo-European | 0 | 1 | 0 | 6 | 9 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 1 | 6 | 3 | 0 | 0 | 0 | 0 | 2 | 0 | 34 | | poy | Pogolo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ppo | Folopa | Teberan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | prf | Paranan | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pri | Paicî | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | prs | Dari | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | ptp | Patep | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ptu | Bambam | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | pus | Pushto | Unclassified | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | pwg | Gapapaiwa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qub | Huallaga Huánuco Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | quc | K'iche' | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | quf | Lambayeque Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | quh | South Bolivian Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qul | North Bolivian Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qup | Southern Pastaza Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | quy | Ayacucho Quechua | Quechuan | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | quz | Cusco Quechua | Quechuan | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvc | Cajamarca Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qve | Eastern Apurímac Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvh | Huamalíes-Dos de Mayo Huánuco Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvm | Margos-Yarowilca-Lauricocha Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvn | North Junín Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvs | San Martín Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvw | Huaylla Wanca Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qvz | Northern Pastaza Quichua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qwh | Huaylas Ancash Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qxh | Panao Huánuco Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qxn | Northern Conchucos Ancash Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | qxo | Southern Conchucos Ancash Quechua | Quechuan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rai | Ramoaaina | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | raj | Rajasthani | Unclassified | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | reg | Kara (Tanzania) | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rej | Rejang | Austronesian | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | rgu | Ringgou | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rkb | Rikbaktsa | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rmc | Carpathian Romani | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rmy | Vlax Romani | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rom | Romany | Unclassified | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | ron | Romanian | Indo-European | 0 | 1 | 0 | 7 | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | | roo | Rotokas | North Bougainville | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rop | Kriol | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | row | Dela-Oenale | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rro | Waima | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ruf | Luguru | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | rug | Roviana | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | run | Rundi | Atlantic-Congo | 0 | 0 | 0 | 1 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | rus | Russian | Indo-European | 0 | 2 | 0 | 7 | 13 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 4 | 2 | 17 | 4 | 0 | 0 | 0 | 0 | 2 | 0 | 59 | | rwo | Rawa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sab | Buglere | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sag | Sango | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | sah | Yakut | Turkic | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | san | Sanskrit | Indo-European | 0 | 0 | 0 | 5 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | | sat | Santali | Austroasiatic | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | sbe | Saliba | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sbk | Safwa | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sbs | Subiya | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | scn | Sicilian | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | sco | Scots | Indo-European | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | seh | Sena | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sey | Secoya | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sgb | Mag-antsi Ayta | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sgz | Sursurunga | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | shi | Tachelhit | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | shj | Shatt | Dajuic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | shn | Shan | Tai-Kadai | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | shp | Shipibo-Conibo | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sim | Mende (Papua New Guinea) | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sin | Sinhala | Indo-European | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | sja | Epena | Chocoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | slk | Slovak | Indo-European | 0 | 0 | 0 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15 | | sll | Salt-Yui | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | slv | Slovenian | Indo-European | 0 | 0 | 0 | 5 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | | smk | Bolinao | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | smo | Samoan | Austronesian | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | sna | Shona | Atlantic-Congo | 0 | 0 | 0 | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | snc | Sinaugoro | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | snd | Sindhi | Indo-European | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | snn | Siona | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | snp | Siane | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | snx | Sam | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sny | Saniyo-Hiyewe | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | som | Somali | Afro-Asiatic | 0 | 0 | 0 | 3 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | soq | Kanasi | Dagan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sot | Southern Sotho | Atlantic-Congo | 0 | 0 | 0 | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | soy | Miyobe | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spa | Spanish | Indo-European | 0 | 2 | 0 | 6 | 13 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 2 | 15 | 4 | 0 | 0 | 0 | 0 | 4 | 0 | 54 | | spl | Selepet | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spm | Akukem | Ramu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spp | Supyire Senoufo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sps | Saposa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | spy | Sabaot | Nilotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sqi | Albanian | Unclassified | 0 | 0 | 0 | 2 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | srd | Sardinian | Unclassified | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | sri | Siriano | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | srm | Saramaccan | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | srn | Sranan Tongo | Indo-European | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | srp | Serbian | Indo-European | 0 | 0 | 0 | 6 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | | srq | Sirionó | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ssd | Siroi | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ssg | Seimat | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ssw | Swati | Atlantic-Congo | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | ssx | Samberigi | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | stp | Southeastern Tepehuan | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sua | Sulka | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sue | Suena | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sun | Sundanese | Austronesian | 0 | 0 | 0 | 3 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | | sus | Susu | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | suz | Sunwar | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | svk | Slovakian Sign Language | Sign Language | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | swa | Swahili (macrolanguage) | Atlantic-Congo | 0 | 1 | 0 | 1 | 7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | | swe | Swedish | Indo-European | 0 | 1 | 0 | 6 | 8 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | | swg | Swabian | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | swh | Swahili (individual language) | Atlantic-Congo | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | swp | Suau | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | sxb | Suba | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | szl | Silesian | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tac | Lowland Tarahumara | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tah | Tahitian | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | taj | Eastern Tamang | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tam | Tamil | Dravidian | 0 | 0 | 0 | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | | taq | Tamasheq | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tat | Tatar | Turkic | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | tav | Tatuyo | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | taw | Tai | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tbc | Takia | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tbf | Mandara | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tbg | North Tairora | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tbo | Tawala | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tbz | Ditammari | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tca | Ticuna | Ticuna-Yuri | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tcs | Torres Strait Creole | Indo-European | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tcz | Thado Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tdt | Tetun Dili | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tee | Huehuetla Tepehua | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tel | Telugu | Dravidian | 0 | 1 | 0 | 7 | 7 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 25 | | ter | Tereno | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tet | Tetum | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tew | Tewa (USA) | Kiowa-Tanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tfr | Teribe | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tgk | Tajik | Indo-European | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | tgl | Tagalog | Austronesian | 0 | 0 | 0 | 5 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | | tgo | Sudest | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tgp | Tangoa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tha | Thai | Tai-Kadai | 0 | 1 | 0 | 6 | 8 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 25 | | tif | Tifal | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tim | Timbe | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tir | Tigrinya | Afro-Asiatic | 0 | 0 | 0 | 2 | 2 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | | tiw | Tiwi | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tiy | Tiruray | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tke | Takwane | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tku | Upper Necaxa Totonac | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tlf | Telefol | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tmd | Haruai | Piawi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tna | Tacana | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tnc | Tanimuca-Retuarã | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tnk | Kwamera | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tnn | North Tanna | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tnp | Whitesands | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | toc | Coyutla Totonac | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tod | Toma | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tof | Gizrra | Eastern Trans-Fly | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | toj | Tojolabal | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ton | Tonga (Tonga Islands) | Austronesian | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | too | Xicotepec De Juárez Totonac | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | top | Papantla Totonac | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tos | Highland Totonac | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tpa | Taupota | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tpi | Tok Pisin | Indo-European | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | tpt | Tlachichilco Tepehua | Totonacan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tpz | Tinputz | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | trc | Copala Triqui | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tsn | Tswana | Atlantic-Congo | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | tso | Tsonga | Atlantic-Congo | 0 | 0 | 0 | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | tsw | Tsishingini | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ttc | Tektiteko | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tte | Bwanabwana | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tuc | Mutu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tue | Tuyuca | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tuf | Central Tunebo | Chibchan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tuk | Turkmen | Turkic | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | tum | Tumbuka | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tuo | Tucano | Tucanoan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tur | Turkish | Turkic | 0 | 3 | 0 | 6 | 7 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 4 | 2 | 0 | 0 | 0 | 0 | 2 | 0 | 28 | | tvk | Southeast Ambrym | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | twi | Twi | Unclassified | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | txq | Tii | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | txu | Kayapó | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tyv | Tuvinian | Turkic | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tzj | Tz'utujil | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tzl | Talossan | Artificial Language | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | tzm | Central Atlas Tamazight | Afro-Asiatic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | tzo | Tzotzil | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ubr | Ubir | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ubu | Umbu-Ungu | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | udu | Uduk | Koman | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | uig | Uighur | Turkic | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | ukr | Ukrainian | Indo-European | 0 | 1 | 0 | 6 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 12 | | uli | Ulithian | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ulk | Meriam Mir | Eastern Trans-Fly | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | umb | Umbundu | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | upv | Uripiv-Wala-Rano-Atchin | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ura | Urarina | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | urb | Urubú-Kaapor | Tupian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | urd | Urdu | Indo-European | 0 | 0 | 0 | 9 | 8 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 22 | | uri | Urim | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | urt | Urat | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | urw | Sop | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | usa | Usarufa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | usp | Uspanteco | Mayan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | uvh | Uri | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | uvl | Lote | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | uzb | Uzbek | Unclassified | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | uzn | Northern Uzbek | Turkic | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | | vec | Venetian | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | ven | Venda | Atlantic-Congo | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | | vid | Vidunda | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | vie | Vietnamese | Austroasiatic | 0 | 2 | 0 | 7 | 6 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 23 | | viv | Iduna | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | vmy | Ayautla Mazatec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | waj | Waffa | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wal | Wolaytta | Ta-Ne-Omotic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wap | Wapishana | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | war | Waray (Philippines) | Austronesian | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | wat | Kaninuwa | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wbi | Vwanji | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wbp | Warlpiri | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wed | Wedau | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wer | Weri | Kunimaipan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wim | Wik-Mungkan | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wiu | Wiru | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wiv | Vitu | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wln | Walloon | Indo-European | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wmt | Walmajarri | Pama-Nyungan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wmw | Mwani | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wnc | Wantoat | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wnu | Usan | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wol | Wolof | Atlantic-Congo | 0 | 0 | 0 | 3 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | wos | Hanga Hundi | Ndu | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wrk | Garrwa | Garrwan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wro | Worrorra | Worrorran | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wrs | Waris | Border | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wsk | Waskia | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wuu | Wu Chinese | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | wuv | Wuvulu-Aua | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xav | Xavánte | Nuclear-Macro-Je | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xbi | Kombio | Nuclear Torricelli | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xed | Hdi | Afro-Asiatic | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xho | Xhosa | Atlantic-Congo | 0 | 0 | 0 | 3 | 3 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | | xla | Kamula | Kamula-Elevala | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xnn | Northern Kankanay | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xon | Konkomba | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xsi | Sio | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xtd | Diuxi-Tilantongo Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | xtm | Magdalena Peñasco Mixtec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yaa | Yaminahua | Pano-Tacanan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yad | Yagua | Peba-Yagua | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yal | Yalunka | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yap | Yapese | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yaq | Yaqui | Uto-Aztecan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yby | Yaweyuha | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ycn | Yucuna | Arawakan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ydd | Eastern Yiddish | Indo-European | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | | yid | Yiddish | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yka | Yakan | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yle | Yele | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yml | Iamalele | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yon | Yongkom | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yor | Yoruba | Atlantic-Congo | 0 | 0 | 0 | 4 | 5 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16 | | yrb | Yareba | Yareban | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yre | Yaouré | Mande | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yss | Yessan-Mayo | Sepik | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yue | Yue Chinese | Sino-Tibetan | 0 | 0 | 0 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | | yuj | Karkar-Yuri | Pauwasi | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yut | Yopno | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yuw | Yau (Morobe Province) | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | yva | Yawa | Yawa-Saweru | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zaa | Sierra de Juárez Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zab | Western Tlacolula Valley Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zac | Ocotlán Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zad | Cajonos Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zai | Isthmus Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zaj | Zaramo | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zam | Miahuatlán Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zao | Ozolotepec Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zap | Zapotec | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zar | Rincón Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zas | Santo Domingo Albarradas Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zat | Tabaa Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zav | Yatzachi Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zaw | Mitla Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zca | Coatecas Altas Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zga | Kinga | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zho | Chinese | Unclassified | 0 | 2 | 0 | 4 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 26 | | zia | Zia | Nuclear Trans New Guinea | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ziw | Zigula | Atlantic-Congo | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zlm | Malay (individual language) | Austronesian | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zos | Francisco León Zoque | Mixe-Zoque | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpc | Choapan Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpl | Lachixío Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpm | Mixtepec Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpo | Amatlán Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpq | Zoogocho Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpu | Yalálag Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpv | Chichicapan Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zpz | Texmelucan Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zsm | Standard Malay | Austronesian | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | | zsr | Southern Rincon Zapotec | Unclassified | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | ztq | Quioquitani-Quierí Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zty | Yatee Zapotec | Otomanguean | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | zul | Zulu | Atlantic-Congo | 0 | 0 | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | | zyp | Zyphe Chin | Sino-Tibetan | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | | Total | None | None | None | 0 | 55 | 49 | 1492 | 836 | 316 | 7 | 10 | 22 | 5 | 0 | 3 | 28 | 91 | 56 | 591 | 88 | 2 | 2 | 6 | 7 | 37 | 24 |