In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score

In [113]:
import re

In [None]:
## Loading data

In [7]:
labels_file = 'wili-2018/labels.csv'
x_train_file = 'wili-2018/x_train.txt'
y_train_file = 'wili-2018/y_train.txt'
x_test_file = 'wili-2018/x_test.txt'
y_test_file = 'wili-2018/y_test.txt'

In [8]:
def read_file(x, y):
    
    # Read contents of 'y_file' into a dataframe
    y_df = pd.read_csv(y_file, header=None)
    # y_df has only one column; name it 'Label'
    y_df.columns = ['Label']

    # Read contents of 'x_file' into a list of strings
    with open(x_file, encoding='utf8') as f:
        x_pars = f.readlines()
    
    # Remove all whitespace characters (such as '\n') from the beginning and the end of the strings
    x_pars = [t.strip() for t in x_pars]
    # Convert the list into a dataframe, with one column: 'Par'
    x_df = pd.DataFrame(x_pars, columns=['Par']) 
    return (x_df, y_df)

In [9]:
x_train, y_train = read_file(x_train_file, y_train_file)

In [139]:
labels_df = pd.read_csv(labels_file, sep=';')
labels_df

Unnamed: 0,Label,English,Wiki Code,ISO 369-3,German,Language family,Writing system,Remarks,Synonyms
0,ace,Achinese,ace,ace,Achinesisch,Austronesian,,,
1,afr,Afrikaans,af,afr,Afrikaans,Indo-European,,,
2,als,Alemannic German,als,gsw,Alemannisch,Indo-European,,(ursprünglich nur Elsässisch),
3,amh,Amharic,am,amh,Amharisch,Afro-Asiatic,,,
4,ang,Old English,ang,ang,Altenglisch,Indo-European,,(ca. 450-1100),Angelsächsisch
5,ara,Arabic,ar,ara,Arabisch,Afro-Asiatic,,,
6,arg,Aragonese,an,arg,Aragonesisch,Indo-European,,,
7,arz,Egyptian Arabic,arz,arz,Ägyptisch-Arabisch,Afro-Asiatic,,,
8,asm,Assamese,as,asm,Assamesisch,Indo-European,,,
9,ast,Asturian,ast,ast,Asturisch,Indo-European,,,


In [63]:
y_tr_1k = y_train[0:1000]

In [66]:
y_tr_new = y_tr_1k.merge(labels_df, on = "Label", how = "left")
y_tr_new_1k = y_tr_new['English']

In [69]:
train_list = list(y_tr_new_1k)

In [135]:
y_tr_new_1k_2 = y_tr_new['Wiki Code']

In [136]:
y_tr_new_1k_2

0             et
1             sv
2            mai
3             oc
4             th
5             om
6             li
7             gu
8            pnb
9            zea
10           krc
11            ht
12           pcd
13            ta
14            vi
15            pa
16           szl
17           ckb
18           fur
19           wuu
20           arz
21            to
22            eu
23       map-bms
24           glk
25       map-bms
26            nl
27           arz
28            bo
29            ja
         ...    
970          scn
971           yo
972           vo
973           af
974          zea
975           qu
976          udm
977      cbk-zam
978           lv
979           cs
980           cs
981          wuu
982           af
983          zea
984           tr
985          vls
986           wo
987          als
988          pnb
989          pam
990          pap
991          mhr
992           sa
993    be-tarask
994          cdo
995          tcy
996           bn
997           

In [None]:
## Spacy 

In [19]:
#!pip install spacy-langdetect
#!python -m spacy download en
import spacy
from spacy_langdetect import LanguageDetector

nlp = spacy.load("en")
nlp.add_pipe(LanguageDetector(), name="language_detector", last=True)
text = "This is English text Er lebt mit seinen Eltern und seiner Schwester in Berlin. Yo me divierto todos los días en el parque. Je m'appelle Angélica Summer, j'ai 12 ans et je suis canadienne."
doc = nlp(text)
# document level language detection. Think of it like average language of document!
print(doc._.language['language'])
# sentence level language detection
for i, sent in enumerate(doc.sents):
    print(sent, sent._.language)

fr
This is English text {'language': 'en', 'score': 0.9999983198032172}
Er lebt mit seinen {'language': 'de', 'score': 0.9999969687866875}
Eltern und seiner Schwester in Berlin. {'language': 'de', 'score': 0.9999956876999901}
Yo me {'language': 'id', 'score': 0.9999947568149832}
divierto todos los días en el parque. {'language': 'es', 'score': 0.999996449223586}
Je m'appelle Angélica Summer, j'ai 12 ans et je suis canadienne. {'language': 'fr', 'score': 0.999997298086013}


In [34]:
predictions = []
for i in range(1000): 
    doc = nlp(x_train['Par'][i])
    predicted_lang = doc._.language['language']
    print(predicted_lang)
    predictions.append(predicted_lang)

et
en
hi
ca
th
so
nl
en
ur
nl
ru
no
fr
ta
vi
pa
pl
fa
it
zh-cn
ar
fi
nl
id
fa
id
nl
ar
zh-cn
ja
es
es
es
UNKNOWN
tr
ru
tr
tl
tr
de
ru
ca
ru
de
ur
ru
ja
ru
UNKNOWN
id
ru
ko
da
pt
fr
so
tl
ne
no
hr
mr
UNKNOWN
UNKNOWN
hr
sl
hr
id
zh-cn
ko
ru
ro
es
so
bg
th
en
sw
id
tr
sw
id
nl
ru
ru
ne
sw
et
ro
pt
vi
en
bn
hr
hi
mr
fr
ca
ca
it
UNKNOWN
sw
ta
tl
hu
pl
ru
cy
fr
id
vi
ru
it
uk
tr
cy
no
tl
he
et
es
bg
ru
ru
no
tr
fr
fr
mr
de
ru
ne
no
tr
tr
et
ko
de
ru
de
it
hr
pl
no
no
no
vi
ru
de
cy
mk
fa
de
te
sw
nl
mr
sl
fa
uk
es
pl
ca
no
nl
bg
UNKNOWN
id
so
it
fa
nl
nl
fr
ca
it
fr
ru
ru
af
ro
fa
ar
et
fi
fa
nl
tr
en
fa
fr
fa
es
ca
gu
en
ru
it
fr
ro
hi
no
ca
sw
hi
fr
mk
de
ru
lv
et
lv
cs
lv
vi
id
ru
id
id
es
ru
da
id
ru
it
ru
ja
et
lt
mr
af
id
ar
tl
lt
ru
it
fr
sq
so
id
sk
es
it
it
id
vi
pl
fa
it
so
de
uk
sw
it
es
lt
hr
fr
hi
tr
tr
es
pl
hi
ru
sv
tl
en
vi
en
cy
bg
de
fr
it
ro
zh-cn
bn
lt
pl
ru
es
it
lt
id
kn
fi
ru
he
id
bn
cy
ru
ne
tr
et
ru
sw
pa
it
UNKNOWN
en
pt
it
tl
UNKNOWN
en
kn
id
en
cs
et
ur
it
et
ru


In [37]:
#!pip install pycountry

Collecting pycountry
  Downloading pycountry-19.8.18.tar.gz (10.0 MB)
[K     |████████████████████████████████| 10.0 MB 10.1 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: pycountry
  Building wheel for pycountry (setup.py) ... [?25ldone
[?25h  Created wheel for pycountry: filename=pycountry-19.8.18-py2.py3-none-any.whl size=10627360 sha256=b5f22b0bb4c3f35ad8a8733fb91750de15b599bbf3ee68f62260a5b25b7183af
  Stored in directory: /Users/lu_qxl/Library/Caches/pip/wheels/5e/d3/aa/d4d009d8d8489715d33932f44fc2582dfc216e0fde3a5016ee
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-19.8.18


In [39]:
#!pip install pycountry
from pycountry import languages


French


In [56]:
names = []
for i in range(len(predictions)):
    if predictions[i] == 'UNKNOWN':
        names.append('nan')
    elif predictions[i] == 'zh-cn':
        names.append(languages.get(alpha_2= 'zh').name)
    elif predictions[i] == 'zh-tw':
        names.append(languages.get(alpha_2= 'zh').name)
    else:
        lang_name = languages.get(alpha_2=predictions[i]).name
        print(lang_name)
        names.append(lang_name)

Estonian
English
Hindi
Catalan
Thai
Somali
Dutch
English
Urdu
Dutch
Russian
Norwegian
French
Tamil
Vietnamese
Panjabi
Polish
Persian
Italian
Arabic
Finnish
Dutch
Indonesian
Persian
Indonesian
Dutch
Arabic
Japanese
Spanish
Spanish
Spanish
Turkish
Russian
Turkish
Tagalog
Turkish
German
Russian
Catalan
Russian
German
Urdu
Russian
Japanese
Russian
Indonesian
Russian
Korean
Danish
Portuguese
French
Somali
Tagalog
Nepali (macrolanguage)
Norwegian
Croatian
Marathi
Croatian
Slovenian
Croatian
Indonesian
Korean
Russian
Romanian
Spanish
Somali
Bulgarian
Thai
English
Swahili (macrolanguage)
Indonesian
Turkish
Swahili (macrolanguage)
Indonesian
Dutch
Russian
Russian
Nepali (macrolanguage)
Swahili (macrolanguage)
Estonian
Romanian
Portuguese
Vietnamese
English
Bengali
Croatian
Hindi
Marathi
French
Catalan
Catalan
Italian
Swahili (macrolanguage)
Tamil
Tagalog
Hungarian
Polish
Russian
Welsh
French
Indonesian
Vietnamese
Russian
Italian
Ukrainian
Turkish
Welsh
Norwegian
Tagalog
Hebrew
Estonian
Spanish


In [43]:
predictions[33]

'UNKNOWN'

In [59]:
languages.get(alpha_2='zh').name

'Chinese'

In [71]:
train_list

['Estonian',
 'Swedish',
 'Maithili',
 'Occitan',
 'Thai',
 'Oromo',
 'Limburgan',
 'Gujarati',
 'Western Panjabi',
 'Zeeuws',
 'Karachay-Balkar',
 'Haitian Creole',
 'Picard',
 'Tamil',
 'Vietnamese',
 'Panjabi',
 'Silesian',
 'Central Kurdish',
 'Friulian',
 'Wu Chinese',
 'Egyptian Arabic',
 'Tongan',
 'Basque',
 'Banyumasan',
 'Gilaki',
 'Banyumasan',
 'Dutch',
 'Egyptian Arabic',
 'Tibetan',
 'Japanese',
 'Aragonese',
 'Sardinian',
 'Extremaduran',
 'Sinhala',
 'Kurdish',
 'Chechen',
 'Turkmen',
 'Pangasinan',
 'Turkish',
 'Alemannic German',
 'Komi-Permyak',
 'Sardinian',
 'Karachay-Balkar',
 'Latin',
 'Urdu',
 'Tatar',
 'Japanese',
 'Buryat',
 'Tibetan',
 'Indonesian',
 'Kirghiz',
 'Cantonese',
 'Danish',
 'Portuguese',
 'French',
 'Oromo',
 'Pangasinan',
 'Oriya',
 'Bokmål',
 'Lojban',
 'Konkani',
 'Amharic',
 'Central Khmer',
 'Serbo-Croatian',
 'Slovene',
 'Bosnian',
 'Tetum',
 'Standard Chinese',
 'Korean',
 'Yakut',
 'Aromanian',
 'Asturian',
 'Wolof',
 'Bulgarian',
 'Thai'

In [72]:
acc = accuracy_score(train_list, names)
print("Accuracy: ", acc)

Accuracy:  0.213


In [73]:
## FastText 
#!pip install fasttext
import fasttext
model = fasttext.load_model('lid.176.ftz')
print(model.predict(x_train['Par'][0]))  # top 2 matching languages

# predictions = model.predict(sentences)
# print(predictions)



(('__label__et',), array([0.87263]))


In [98]:
ft_predict = []
for i in range(1000): 
    sentences = x_train['Par'][i]
    predicted_lang = model.predict(sentences)
    ft_predict.append(predicted_lang[0][0][9:])

In [99]:
ft_predict

['et',
 'en',
 'hi',
 'oc',
 'th',
 'fi',
 'li',
 'en',
 'pnb',
 'nl',
 'ru',
 'tl',
 'fr',
 'ta',
 'vi',
 'pa',
 'pl',
 'ckb',
 'it',
 'zh',
 'ar',
 'br',
 'eu',
 'id',
 'ar',
 'id',
 'nl',
 'ar',
 'bo',
 'ja',
 'es',
 'ca',
 'es',
 'si',
 'ku',
 'ce',
 'tk',
 'eo',
 'tr',
 'als',
 'ru',
 'fr',
 'krc',
 'de',
 'ur',
 'tt',
 'ja',
 'mn',
 'bo',
 'id',
 'ru',
 'zh',
 'da',
 'pt',
 'fr',
 'fi',
 'la',
 'or',
 'no',
 'jbo',
 'mr',
 'am',
 'km',
 'hr',
 'sl',
 'bs',
 'cy',
 'zh',
 'ko',
 'sah',
 'it',
 'es',
 'nl',
 'bg',
 'th',
 'gd',
 'en',
 'ms',
 'tr',
 'eo',
 'su',
 'br',
 'ky',
 'mn',
 'ne',
 'en',
 'et',
 'jbo',
 'pt',
 'ro',
 'en',
 'as',
 'hu',
 'hi',
 'mr',
 'fr',
 'es',
 'es',
 'lmo',
 'my',
 'su',
 'ta',
 'ilo',
 'cs',
 'pl',
 'ru',
 'ga',
 'ca',
 'it',
 'hr',
 'krc',
 'scn',
 'be',
 'ku',
 'cy',
 'is',
 'mg',
 'yi',
 'cs',
 'es',
 'bg',
 'ru',
 'ru',
 'is',
 'tr',
 'fr',
 'fr',
 'mr',
 'nl',
 'tt',
 'sa',
 'tl',
 'tr',
 'az',
 'et',
 'ko',
 'de',
 'ru',
 'de',
 'it',
 'sh',
 '

In [129]:
ft_names = []
for i in range(len(ft_predict)):
    if len(ft_predict[i]) == 3:
        ft_names.append('nan')
    else:
        lang_name = languages.get(alpha_2=ft_predict[i]).name
        if '(' in lang_name:
            ft_names.append(re.sub("[(\[].*?[\)]", "", lang_name)[:-1])
        else:
            ft_names.append(lang_name)
        

In [130]:
ft_names

['Estonian',
 'English',
 'Hindi',
 'Occitan',
 'Thai',
 'Finnish',
 'Limburgan',
 'English',
 'nan',
 'Dutch',
 'Russian',
 'Tagalog',
 'French',
 'Tamil',
 'Vietnamese',
 'Panjabi',
 'Polish',
 'nan',
 'Italian',
 'Chinese',
 'Arabic',
 'Breton',
 'Basque',
 'Indonesian',
 'Arabic',
 'Indonesian',
 'Dutch',
 'Arabic',
 'Tibetan',
 'Japanese',
 'Spanish',
 'Catalan',
 'Spanish',
 'Sinhala',
 'Kurdish',
 'Chechen',
 'Turkmen',
 'Esperanto',
 'Turkish',
 'nan',
 'Russian',
 'French',
 'nan',
 'German',
 'Urdu',
 'Tatar',
 'Japanese',
 'Mongolian',
 'Tibetan',
 'Indonesian',
 'Russian',
 'Chinese',
 'Danish',
 'Portuguese',
 'French',
 'Finnish',
 'Latin',
 'Oriya',
 'Norwegian',
 'nan',
 'Marathi',
 'Amharic',
 'Central Khmer',
 'Croatian',
 'Slovenian',
 'Bosnian',
 'Welsh',
 'Chinese',
 'Korean',
 'nan',
 'Italian',
 'Spanish',
 'Dutch',
 'Bulgarian',
 'Thai',
 'Scottish Gaelic',
 'English',
 'Malay',
 'Turkish',
 'Esperanto',
 'Sundanese',
 'Breton',
 'Kirghiz',
 'Mongolian',
 'Nepal

In [131]:
acc = accuracy_score(train_list, ft_names)
print("Accuracy: ", acc)

Accuracy:  0.424


In [112]:
ft_names[3]

'Occitan (post 1500)'

In [124]:
re.sub("[(\[].*?[\)]", "", ft_names[3])[:-1]

'Occita'

In [125]:
'(' in ft_names[3]

False

In [137]:
acc = accuracy_score(y_tr_new_1k_2, ft_predict)
print("Accuracy: ", acc)

Accuracy:  0.519


In [151]:
langs = ["Gujarati", "Malayalam", "Albanian", "Punjabi", "Telugu", "Arabic", "Bulgarian", "Bengali", "Macedonian", 
      "Tamil", "Tagalog", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Finnish", 
      "French", "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Indonesian", "Italian", "Japanese", "Korean", 
      "Latvian", "Lithuanian", "Norwegian", "Persian", "Polish", "Portuguese", "Romanian", "Russian", "Slovak", 
      "Spanish", "Swedish", "Thai", "Turkish", "Ukrainian", "Urdu", "Vietnamese"]

In [152]:
len(langs)

45

In [156]:
for name in labels_df['English']:
    if  name == 'Wu Chinese' or 'Cantonese' or 'Standard Chinese' or 'Hakka Chinese' or 'Literary Chinese':
        labels_df['English'][i] == 'Chinese'
    elif name == 'modern Greek ':
        labels_df['English'][i] == 'Greek'
    elif name == 'Norwegian Nynorsk':
        labels_df['English'][i] == 'Norwegian'

KeyError: 999

In [160]:
labels_df['English'].replace({"Wu Chinese": "Chinese", "Cantonese": "Chinese", "Standard Chinese": 'Chinese',
                              'Hakka Chinese': 'Chinese', 'Literary Chinese': 'Chinese', 'Modern Greek': 'Greek',
                             'Norwegian Nynorsk': 'Norwegian'}, inplace=True)
    

In [161]:
lang_labels = list(labels_df[labels_df['English'].isin(langs)]['Label'])
len(lang_labels)

48

In [162]:
lang_names = (list(labels_df[labels_df['English'].isin(langs)]['English']))
lang_names

['Arabic',
 'Bengali',
 'Bulgarian',
 'Czech',
 'Danish',
 'German',
 'Greek',
 'English',
 'Estonian',
 'Persian',
 'Finnish',
 'French',
 'Gujarati',
 'Chinese',
 'Hebrew',
 'Hindi',
 'Croatian',
 'Hungarian',
 'Indonesian',
 'Italian',
 'Japanese',
 'Korean',
 'Latvian',
 'Lithuanian',
 'Chinese',
 'Malayalam',
 'Macedonian',
 'Dutch',
 'Norwegian',
 'Polish',
 'Portuguese',
 'Romanian',
 'Russian',
 'Slovak',
 'Spanish',
 'Albanian',
 'Swedish',
 'Tamil',
 'Telugu',
 'Tagalog',
 'Thai',
 'Turkish',
 'Ukrainian',
 'Urdu',
 'Vietnamese',
 'Chinese',
 'Chinese',
 'Chinese']

In [None]:
## Greek - modern Greek 
## Chinese
## Norwegian - Norwegian Nynorsk
## Slovenian exclude 

In [179]:
def read_file(x_file, y_file):
    y_df = pd.read_csv(y_file, header=None)
    # y_df has only one column; name it 'Label'
    y_df.columns = ['Label']

    # Read contents of 'x_file' into a list of strings
    with open(x_file, encoding='utf8') as f:
        x_pars = f.readlines()
    
    # Remove all whitespace characters (such as '\n') from the beginning and the end of the strings
    x_pars = [t.strip() for t in x_pars]
    # Convert the list into a dataframe, with one column: 'Par'
    x_df = pd.DataFrame(x_pars, columns=['Par']) 
    # Just keep paragraphs of languages in lang_labels (and remove other languages)
    x_df = x_df[y_df['Label'].isin(lang_labels)]
    # Just keep languages in lang_labels
    y_df = y_df[y_df['Label'].isin(lang_labels)]

    return (x_df, y_df)

In [180]:
x_train, y_train = read_file(x_train_file, y_train_file)

In [183]:
x_train['Par'][2]

KeyError: 2

In [169]:
y_tr_1k = y_train

In [199]:
y_tr_new = y_train.merge(labels_df, on = "Label", how = "left")
y_tr_new_1k = y_tr_new['English']

In [200]:
x_train.reset_index()

Unnamed: 0,index,Par
0,0,Klement Gottwaldi surnukeha palsameeriti ning ...
1,1,"Sebes, Joseph; Pereira Thomas (1961) (på eng)...."
2,4,ถนนเจริญกรุง (อักษรโรมัน: Thanon Charoen Krung...
3,7,He was a economics graduate from Elphinstone C...
4,13,விசாகப்பட்டினம் தமிழ்ச்சங்கத்தை இந்துப் பத்திர...
5,14,Bùi Tiến Dũng (sinh năm 1959 tại huyện Ứng Hòa...
6,19,UNC有得一只历史悠久个'诚信守则'。渠是由学堂个诚信法庭（Honor Court）来执行个...
7,26,De spons behoort tot het geslacht Haliclona en...
8,29,エノが行きがかりでバスに乗ってしまい、気分が悪くなった際に助けるが、今すぐバスを降りたいと運...
9,38,Tsutinalar (İngilizce: Tsuut'ina): Kanada'da A...


In [193]:
x_train_reindex = x_train.reset_index()

In [201]:
x_train_reindex['Par'][3]

'He was a economics graduate from Elphinstone College, Mumbai. He was an industrialist in plastics business. He served as a president of Gujarat Chamber of Commerce and Industry in 1990s.'

In [202]:
y_tr_new_1k

0          Estonian
1           Swedish
2              Thai
3          Gujarati
4             Tamil
5        Vietnamese
6           Chinese
7             Dutch
8          Japanese
9           Turkish
10             Urdu
11         Japanese
12       Indonesian
13          Chinese
14           Danish
15       Portuguese
16           French
17          Chinese
18           Korean
19        Bulgarian
20             Thai
21         Estonian
22       Portuguese
23       Portuguese
24            Hindi
25            Tamil
26          Spanish
27        Bulgarian
28           French
29           French
            ...    
23970        Danish
23971        Hebrew
23972    Portuguese
23973     Ukrainian
23974       Turkish
23975         Dutch
23976         Tamil
23977       Tagalog
23978    Lithuanian
23979    Lithuanian
23980     Malayalam
23981     Hungarian
23982      Croatian
23983       Chinese
23984        French
23985     Hungarian
23986        Korean
23987        Hebrew
23988        German


In [203]:
ft_predict = []
for i in range(len(x_train_reindex)): 
    sentences = x_train_reindex['Par'][i]
    predicted_lang = model.predict(sentences)
    print(predicted_lang)
    ft_predict.append(predicted_lang[0][0][9:])

(('__label__et',), array([0.87263]))
(('__label__en',), array([0.26207453]))
(('__label__th',), array([0.99940717]))
(('__label__en',), array([0.99700701]))
(('__label__ta',), array([0.99972832]))
(('__label__vi',), array([0.99803317]))
(('__label__zh',), array([0.99906862]))
(('__label__nl',), array([0.99554825]))
(('__label__ja',), array([1.00004518]))
(('__label__tr',), array([0.97863638]))
(('__label__ur',), array([0.94376165]))
(('__label__ja',), array([1.00004745]))
(('__label__id',), array([0.79528564]))
(('__label__zh',), array([0.99787217]))
(('__label__da',), array([0.94664299]))
(('__label__pt',), array([0.855272]))
(('__label__fr',), array([0.86295325]))
(('__label__zh',), array([0.99940497]))
(('__label__ko',), array([1.00006831]))
(('__label__bg',), array([0.9240793]))
(('__label__th',), array([0.99975604]))
(('__label__et',), array([0.95673162]))
(('__label__pt',), array([0.96831256]))
(('__label__en',), array([0.73308909]))
(('__label__hi',), array([0.99295318]))
(('__l

(('__label__bg',), array([0.90219492]))
(('__label__id',), array([0.75737214]))
(('__label__te',), array([0.99983394]))
(('__label__de',), array([0.94113296]))
(('__label__de',), array([0.97604126]))
(('__label__nl',), array([0.90957081]))
(('__label__et',), array([0.95771396]))
(('__label__fi',), array([0.99509287]))
(('__label__tr',), array([0.99096161]))
(('__label__pt',), array([0.92812365]))
(('__label__et',), array([0.98068881]))
(('__label__te',), array([0.9999519]))
(('__label__da',), array([0.86906004]))
(('__label__ar',), array([0.99350822]))
(('__label__uk',), array([0.98528552]))
(('__label__tr',), array([0.98660553]))
(('__label__fr',), array([0.99334455]))
(('__label__ur',), array([0.96681964]))
(('__label__th',), array([0.99895221]))
(('__label__tr',), array([0.52140498]))
(('__label__ro',), array([0.99684596]))
(('__label__sk',), array([0.81911719]))
(('__label__ru',), array([0.98551244]))
(('__label__fr',), array([0.97166002]))
(('__label__nn',), array([0.78859794]))
(

(('__label__ar',), array([0.99205393]))
(('__label__cs',), array([0.97821337]))
(('__label__zh',), array([0.9997161]))
(('__label__mk',), array([0.97476631]))
(('__label__tr',), array([0.98479104]))
(('__label__fr',), array([0.84551346]))
(('__label__sq',), array([0.99128205]))
(('__label__nn',), array([0.58420217]))
(('__label__he',), array([0.9997384]))
(('__label__sq',), array([0.99786222]))
(('__label__da',), array([0.62362719]))
(('__label__bg',), array([0.86897308]))
(('__label__fr',), array([0.991458]))
(('__label__sv',), array([0.99913698]))
(('__label__pt',), array([0.590114]))
(('__label__ml',), array([0.99946874]))
(('__label__fa',), array([0.98483282]))
(('__label__sv',), array([0.99787319]))
(('__label__pl',), array([0.99549741]))
(('__label__da',), array([0.94647157]))
(('__label__tr',), array([0.92157036]))
(('__label__zh',), array([0.94384891]))
(('__label__el',), array([0.99644053]))
(('__label__fa',), array([0.64135307]))
(('__label__en',), array([0.94803935]))
(('__l

(('__label__ro',), array([0.96910948]))
(('__label__cs',), array([0.98968804]))
(('__label__bg',), array([0.94413233]))
(('__label__fa',), array([0.98782873]))
(('__label__hi',), array([0.95872062]))
(('__label__es',), array([0.97977722]))
(('__label__sq',), array([0.96977013]))
(('__label__te',), array([0.99967945]))
(('__label__bg',), array([0.88458478]))
(('__label__zh',), array([0.55187869]))
(('__label__ta',), array([0.99982798]))
(('__label__vi',), array([0.9981637]))
(('__label__zh',), array([0.9954896]))
(('__label__zh',), array([0.73795968]))
(('__label__cs',), array([0.97814661]))
(('__label__fa',), array([0.98945522]))
(('__label__tl',), array([0.92131835]))
(('__label__pt',), array([0.95397067]))
(('__label__es',), array([0.97139215]))
(('__label__zh',), array([0.99990386]))
(('__label__gu',), array([0.99921763]))
(('__label__nl',), array([0.96566671]))
(('__label__en',), array([0.98569655]))
(('__label__vi',), array([0.9991461]))
(('__label__pt',), array([0.9230634]))
(('_

(('__label__id',), array([0.77808744]))
(('__label__hu',), array([0.98534113]))
(('__label__sv',), array([0.99938929]))
(('__label__it',), array([0.99422443]))
(('__label__sv',), array([0.99209911]))
(('__label__gu',), array([0.99882215]))
(('__label__th',), array([0.99930662]))
(('__label__de',), array([0.99387211]))
(('__label__el',), array([0.99724871]))
(('__label__lt',), array([0.95825654]))
(('__label__bg',), array([0.9354499]))
(('__label__fr',), array([0.98493552]))
(('__label__ta',), array([0.99990112]))
(('__label__ar',), array([0.97284323]))
(('__label__sq',), array([0.97982997]))
(('__label__ro',), array([0.80237043]))
(('__label__hi',), array([0.98148662]))
(('__label__bn',), array([0.9907068]))
(('__label__id',), array([0.89358383]))
(('__label__zh',), array([0.97950059]))
(('__label__fa',), array([0.99190742]))
(('__label__id',), array([0.4725771]))
(('__label__uk',), array([0.94482934]))
(('__label__mk',), array([0.94600189]))
(('__label__de',), array([0.99797088]))
(('

(('__label__bn',), array([0.98588109]))
(('__label__vi',), array([0.99593806]))
(('__label__it',), array([0.99844152]))
(('__label__te',), array([0.99991679]))
(('__label__cs',), array([0.98728353]))
(('__label__da',), array([0.90800387]))
(('__label__es',), array([0.98899871]))
(('__label__ta',), array([0.99985075]))
(('__label__ml',), array([0.99867046]))
(('__label__it',), array([0.99177897]))
(('__label__nl',), array([0.99970669]))
(('__label__ru',), array([0.9910568]))
(('__label__id',), array([0.87862593]))
(('__label__pl',), array([0.97838849]))
(('__label__te',), array([0.99993736]))
(('__label__ja',), array([1.00003183]))
(('__label__nl',), array([0.98229665]))
(('__label__ml',), array([0.99927586]))
(('__label__lt',), array([0.98203576]))
(('__label__es',), array([0.92172593]))
(('__label__sh',), array([0.32652351]))
(('__label__ja',), array([1.00004661]))
(('__label__hr',), array([0.47344735]))
(('__label__gu',), array([0.99977142]))
(('__label__et',), array([0.82515585]))
(

(('__label__vi',), array([0.23383583]))
(('__label__nl',), array([0.99978429]))
(('__label__el',), array([0.99798644]))
(('__label__mk',), array([0.92693686]))
(('__label__es',), array([0.97893602]))
(('__label__hi',), array([0.98852652]))
(('__label__zh',), array([0.97237378]))
(('__label__et',), array([0.95530564]))
(('__label__bg',), array([0.68369269]))
(('__label__sv',), array([0.99942142]))
(('__label__zh',), array([0.70575005]))
(('__label__he',), array([0.99994111]))
(('__label__en',), array([0.97876346]))
(('__label__hi',), array([0.98770595]))
(('__label__zh',), array([0.86946595]))
(('__label__hi',), array([0.99019444]))
(('__label__ar',), array([0.99699557]))
(('__label__it',), array([0.47432745]))
(('__label__nl',), array([0.9975763]))
(('__label__lt',), array([0.9896751]))
(('__label__nn',), array([0.8567735]))
(('__label__vi',), array([0.99877048]))
(('__label__en',), array([0.92771202]))
(('__label__sk',), array([0.89345038]))
(('__label__bn',), array([0.99412018]))
(('

(('__label__tr',), array([0.98878241]))
(('__label__zh',), array([0.99886173]))
(('__label__uk',), array([0.99490142]))
(('__label__zh',), array([0.98353159]))
(('__label__te',), array([0.99991012]))
(('__label__gu',), array([0.99929678]))
(('__label__zh',), array([0.99718297]))
(('__label__zh',), array([0.9525882]))
(('__label__ur',), array([0.81886047]))
(('__label__ru',), array([0.97937459]))
(('__label__es',), array([0.98513526]))
(('__label__mk',), array([0.97051245]))
(('__label__fr',), array([0.997558]))
(('__label__uk',), array([0.99912846]))
(('__label__ta',), array([0.99992418]))
(('__label__es',), array([0.96266031]))
(('__label__it',), array([0.99205196]))
(('__label__bn',), array([0.9957062]))
(('__label__ml',), array([0.99916738]))
(('__label__lv',), array([0.92655414]))
(('__label__zh',), array([0.88424927]))
(('__label__ml',), array([0.99938053]))
(('__label__ta',), array([0.99970722]))
(('__label__ml',), array([0.99788427]))
(('__label__nl',), array([0.95811194]))
(('_

(('__label__bg',), array([0.92614698]))
(('__label__te',), array([0.99987727]))
(('__label__sv',), array([0.99928796]))
(('__label__zh',), array([0.99424702]))
(('__label__gu',), array([0.9999795]))
(('__label__en',), array([0.99338764]))
(('__label__ro',), array([0.99878079]))
(('__label__ru',), array([0.35170537]))
(('__label__it',), array([0.99080431]))
(('__label__sk',), array([0.95816851]))
(('__label__ja',), array([1.00004733]))
(('__label__lt',), array([0.76872784]))
(('__label__mk',), array([0.96140814]))
(('__label__et',), array([0.96459717]))
(('__label__mk',), array([0.87237161]))
(('__label__et',), array([0.9324109]))
(('__label__zh',), array([0.99283081]))
(('__label__id',), array([0.81423515]))
(('__label__ru',), array([0.98251933]))
(('__label__he',), array([0.99988133]))
(('__label__de',), array([0.99451989]))
(('__label__he',), array([0.99946541]))
(('__label__tl',), array([0.95431125]))
(('__label__ro',), array([0.99727952]))
(('__label__nl',), array([0.99480742]))
((

(('__label__th',), array([0.99984038]))
(('__label__uk',), array([0.99558514]))
(('__label__ja',), array([1.0000236]))
(('__label__uk',), array([0.98858207]))
(('__label__zh',), array([0.98524517]))
(('__label__bg',), array([0.83208466]))
(('__label__sq',), array([0.99864829]))
(('__label__lt',), array([0.97716331]))
(('__label__bn',), array([0.99071413]))
(('__label__he',), array([0.99922299]))
(('__label__fi',), array([0.98746687]))
(('__label__ko',), array([1.00006735]))
(('__label__it',), array([0.99402779]))
(('__label__sh',), array([0.35020167]))
(('__label__mk',), array([0.96263248]))
(('__label__fi',), array([0.96422935]))
(('__label__cs',), array([0.97394854]))
(('__label__zh',), array([0.75427032]))
(('__label__es',), array([0.94050366]))
(('__label__hu',), array([0.99092865]))
(('__label__te',), array([0.99990559]))
(('__label__sv',), array([0.99790001]))
(('__label__sq',), array([0.99800402]))
(('__label__hi',), array([0.98131418]))
(('__label__ru',), array([0.98582596]))
(

(('__label__hu',), array([0.94044012]))
(('__label__lv',), array([0.97203416]))
(('__label__hi',), array([0.98327118]))
(('__label__te',), array([0.99998569]))
(('__label__he',), array([0.99957651]))
(('__label__gu',), array([0.99968201]))
(('__label__et',), array([0.81362045]))
(('__label__zh',), array([0.89828479]))
(('__label__vi',), array([0.99906778]))
(('__label__sk',), array([0.89609057]))
(('__label__hr',), array([0.52457613]))
(('__label__ja',), array([1.00004816]))
(('__label__el',), array([0.99790424]))
(('__label__hi',), array([0.98754901]))
(('__label__lv',), array([0.95658559]))
(('__label__lt',), array([0.85461676]))
(('__label__vi',), array([0.21224946]))
(('__label__he',), array([0.99979442]))
(('__label__pl',), array([0.99189162]))
(('__label__da',), array([0.88159907]))
(('__label__ko',), array([1.00006962]))
(('__label__uk',), array([0.99330497]))
(('__label__ur',), array([0.97138685]))
(('__label__fi',), array([0.97844428]))
(('__label__nl',), array([0.97250712]))


(('__label__he',), array([0.99988878]))
(('__label__nn',), array([0.82911915]))
(('__label__sq',), array([0.99567145]))
(('__label__en',), array([0.95936394]))
(('__label__zh',), array([0.96852404]))
(('__label__ur',), array([0.97688717]))
(('__label__ko',), array([1.00006914]))
(('__label__zh',), array([0.99970973]))
(('__label__ru',), array([0.98674834]))
(('__label__bg',), array([0.97345835]))
(('__label__hu',), array([0.99092662]))
(('__label__ta',), array([0.99990261]))
(('__label__cs',), array([0.95519453]))
(('__label__hi',), array([0.98078829]))
(('__label__sq',), array([0.99267614]))
(('__label__zh',), array([0.99837506]))
(('__label__pt',), array([0.97977132]))
(('__label__ur',), array([0.74566448]))
(('__label__vi',), array([0.99866199]))
(('__label__mk',), array([0.97624832]))
(('__label__uk',), array([0.99918884]))
(('__label__el',), array([0.99779749]))
(('__label__nl',), array([0.99538654]))
(('__label__ar',), array([0.99081194]))
(('__label__mk',), array([0.97086787]))


(('__label__sk',), array([0.89363438]))
(('__label__mk',), array([0.97254425]))
(('__label__he',), array([0.99949986]))
(('__label__ta',), array([0.99991089]))
(('__label__ml',), array([0.99824452]))
(('__label__zh',), array([0.99692166]))
(('__label__de',), array([0.99433756]))
(('__label__sk',), array([0.82240248]))
(('__label__zh',), array([0.99335212]))
(('__label__zh',), array([0.98383141]))
(('__label__lt',), array([0.99158919]))
(('__label__zh',), array([0.99930423]))
(('__label__pms',), array([0.11741644]))
(('__label__hu',), array([0.98382032]))
(('__label__ar',), array([0.98498428]))
(('__label__sk',), array([0.86651063]))
(('__label__en',), array([0.91235167]))
(('__label__bn',), array([0.6030789]))
(('__label__tr',), array([0.97729087]))
(('__label__ml',), array([0.99929732]))
(('__label__fr',), array([0.99019271]))
(('__label__ru',), array([0.98144221]))
(('__label__ro',), array([0.99870652]))
(('__label__ur',), array([0.83176315]))
(('__label__te',), array([0.99963278]))


(('__label__zh',), array([0.99961936]))
(('__label__uk',), array([0.64909875]))
(('__label__th',), array([0.99978817]))
(('__label__fi',), array([0.99518359]))
(('__label__bn',), array([0.99634856]))
(('__label__lt',), array([0.93422216]))
(('__label__fr',), array([0.99448413]))
(('__label__ta',), array([0.99971658]))
(('__label__gu',), array([0.99965286]))
(('__label__uk',), array([0.99471104]))
(('__label__el',), array([0.99672359]))
(('__label__zh',), array([0.86067063]))
(('__label__zh',), array([0.79922825]))
(('__label__bg',), array([0.9712922]))
(('__label__hu',), array([0.98758554]))
(('__label__tl',), array([0.95921898]))
(('__label__zh',), array([0.98888332]))
(('__label__zh',), array([0.88173217]))
(('__label__zh',), array([0.72794187]))
(('__label__zh',), array([0.91183686]))
(('__label__en',), array([0.89688802]))
(('__label__ja',), array([1.00002158]))
(('__label__fr',), array([0.99079937]))
(('__label__pt',), array([0.97135079]))
(('__label__fa',), array([0.99492657]))
(

(('__label__cs',), array([0.94366884]))
(('__label__sk',), array([0.74635243]))
(('__label__sv',), array([0.99792796]))
(('__label__ar',), array([0.99038899]))
(('__label__fa',), array([0.98968685]))
(('__label__da',), array([0.68989748]))
(('__label__zh',), array([0.99992114]))
(('__label__ru',), array([0.98022193]))
(('__label__zh',), array([0.99550134]))
(('__label__sv',), array([0.99474847]))
(('__label__da',), array([0.82848209]))
(('__label__ro',), array([0.99039173]))
(('__label__tl',), array([0.69956309]))
(('__label__tr',), array([0.99116784]))
(('__label__it',), array([0.98699462]))
(('__label__ro',), array([0.98635685]))
(('__label__zh',), array([0.99397612]))
(('__label__en',), array([0.90658659]))
(('__label__mk',), array([0.97633636]))
(('__label__pl',), array([0.99534726]))
(('__label__fa',), array([0.9815554]))
(('__label__sv',), array([0.99821025]))
(('__label__vi',), array([0.99873406]))
(('__label__ko',), array([1.00006974]))
(('__label__lv',), array([0.92663014]))
(

(('__label__nn',), array([0.80686629]))
(('__label__fr',), array([0.97722393]))
(('__label__tr',), array([0.99386537]))
(('__label__fr',), array([0.98796004]))
(('__label__sk',), array([0.51381546]))
(('__label__th',), array([0.99965745]))
(('__label__zh',), array([0.99862856]))
(('__label__ml',), array([0.99937117]))
(('__label__fi',), array([0.98717922]))
(('__label__de',), array([0.95666617]))
(('__label__cs',), array([0.99053133]))
(('__label__mk',), array([0.8319369]))
(('__label__nn',), array([0.96425807]))
(('__label__cs',), array([0.93445826]))
(('__label__sk',), array([0.90766227]))
(('__label__pl',), array([0.93687218]))
(('__label__el',), array([0.9975788]))
(('__label__it',), array([0.97812957]))
(('__label__ml',), array([0.99883676]))
(('__label__ko',), array([1.00006402]))
(('__label__el',), array([0.99760425]))
(('__label__es',), array([0.99461746]))
(('__label__ta',), array([0.99957865]))
(('__label__ar',), array([0.9975087]))
(('__label__nn',), array([0.69559896]))
(('

(('__label__ko',), array([1.00006878]))
(('__label__vi',), array([0.2227264]))
(('__label__hr',), array([0.65654308]))
(('__label__it',), array([0.99392915]))
(('__label__fr',), array([0.96536142]))
(('__label__hr',), array([0.4965682]))
(('__label__et',), array([0.92644656]))
(('__label__bn',), array([0.9922334]))
(('__label__mk',), array([0.95340157]))
(('__label__nl',), array([0.98729044]))
(('__label__sk',), array([0.84099555]))
(('__label__ta',), array([0.99986982]))
(('__label__nl',), array([0.99969089]))
(('__label__en',), array([0.94165802]))
(('__label__lt',), array([0.99646866]))
(('__label__bg',), array([0.9726367]))
(('__label__uk',), array([0.99024075]))
(('__label__sq',), array([0.99568665]))
(('__label__zh',), array([0.99994493]))
(('__label__ur',), array([0.91120839]))
(('__label__cs',), array([0.99071884]))
(('__label__el',), array([0.99709249]))
(('__label__tl',), array([0.8509475]))
(('__label__vi',), array([0.17510442]))
(('__label__vi',), array([0.99790686]))
(('__

(('__label__ur',), array([0.93944484]))
(('__label__cs',), array([0.98315132]))
(('__label__et',), array([0.82647932]))
(('__label__ar',), array([0.99229252]))
(('__label__bg',), array([0.9213137]))
(('__label__fi',), array([0.97745472]))
(('__label__vi',), array([0.99771732]))
(('__label__ko',), array([1.00006962]))
(('__label__zh',), array([0.95101738]))
(('__label__ta',), array([0.99993914]))
(('__label__hr',), array([0.40727225]))
(('__label__bn',), array([0.993141]))
(('__label__ja',), array([1.00002897]))
(('__label__he',), array([0.99967122]))
(('__label__hr',), array([0.33569029]))
(('__label__en',), array([0.59137213]))
(('__label__sq',), array([0.97858834]))
(('__label__zh',), array([0.99977279]))
(('__label__hu',), array([0.99767184]))
(('__label__bg',), array([0.79429352]))
(('__label__ru',), array([0.98675221]))
(('__label__zh',), array([0.99360162]))
(('__label__th',), array([0.99983847]))
(('__label__hi',), array([0.98280281]))
(('__label__da',), array([0.87820858]))
(('

(('__label__zh',), array([0.99124277]))
(('__label__ko',), array([1.0000689]))
(('__label__nn',), array([0.98263282]))
(('__label__hu',), array([0.98523092]))
(('__label__ro',), array([0.99749976]))
(('__label__bn',), array([0.99420696]))
(('__label__pt',), array([0.8816272]))
(('__label__fi',), array([0.9933008]))
(('__label__zh',), array([0.99365753]))
(('__label__fr',), array([0.98964387]))
(('__label__sv',), array([0.99327773]))
(('__label__nl',), array([0.99593145]))
(('__label__zh',), array([0.99952924]))
(('__label__sk',), array([0.89069128]))
(('__label__he',), array([0.99983829]))
(('__label__ko',), array([1.00006938]))
(('__label__vi',), array([0.99858695]))
(('__label__fr',), array([0.98193961]))
(('__label__tl',), array([0.92663366]))
(('__label__th',), array([0.99980104]))
(('__label__hr',), array([0.40495905]))
(('__label__et',), array([0.78042406]))
(('__label__en',), array([0.97637051]))
(('__label__tr',), array([0.88760072]))
(('__label__en',), array([0.91529065]))
(('

(('__label__he',), array([0.99994791]))
(('__label__en',), array([0.54182082]))
(('__label__sv',), array([0.99157345]))
(('__label__fr',), array([0.99097711]))
(('__label__en',), array([0.98918712]))
(('__label__ar',), array([0.98418093]))
(('__label__id',), array([0.70356303]))
(('__label__pl',), array([0.99519759]))
(('__label__zh',), array([0.93331337]))
(('__label__sv',), array([0.98941183]))
(('__label__ar',), array([0.61299014]))
(('__label__ro',), array([0.99379188]))
(('__label__th',), array([0.9994902]))
(('__label__tl',), array([0.91625923]))
(('__label__lv',), array([0.90073019]))
(('__label__gu',), array([0.99992651]))
(('__label__gu',), array([0.99917221]))
(('__label__ja',), array([1.000049]))
(('__label__hu',), array([0.98619121]))
(('__label__cs',), array([0.99401087]))
(('__label__ru',), array([0.33664799]))
(('__label__ja',), array([1.00004756]))
(('__label__en',), array([0.9322989]))
(('__label__nn',), array([0.59955084]))
(('__label__hi',), array([0.93129712]))
(('_

(('__label__et',), array([0.86325675]))
(('__label__fi',), array([0.99789929]))
(('__label__sh',), array([0.39672267]))
(('__label__hi',), array([0.98077589]))
(('__label__th',), array([0.99942297]))
(('__label__fa',), array([0.99103332]))
(('__label__hi',), array([0.97308558]))
(('__label__sq',), array([0.99765229]))
(('__label__tl',), array([0.94850427]))
(('__label__tl',), array([0.94044715]))
(('__label__fi',), array([0.98494029]))
(('__label__da',), array([0.84511447]))
(('__label__sq',), array([0.99466139]))
(('__label__cs',), array([0.98116773]))
(('__label__bn',), array([0.99274832]))
(('__label__ko',), array([1.00006902]))
(('__label__mk',), array([0.94532377]))
(('__label__he',), array([0.99953699]))
(('__label__es',), array([0.99363112]))
(('__label__it',), array([0.98856562]))
(('__label__el',), array([0.99705058]))
(('__label__sk',), array([0.98400909]))
(('__label__fr',), array([0.98870486]))
(('__label__ko',), array([1.00006974]))
(('__label__tl',), array([0.93836755]))


(('__label__th',), array([0.99968469]))
(('__label__sv',), array([0.99804074]))
(('__label__id',), array([0.80108303]))
(('__label__zh',), array([1.00003862]))
(('__label__ur',), array([0.88111293]))
(('__label__fr',), array([0.98628515]))
(('__label__pt',), array([0.98551321]))
(('__label__te',), array([0.99998569]))
(('__label__et',), array([0.8903929]))
(('__label__it',), array([0.99083769]))
(('__label__zh',), array([0.99857152]))
(('__label__lt',), array([0.98791963]))
(('__label__zh',), array([0.94798684]))
(('__label__mk',), array([0.88171375]))
(('__label__en',), array([0.96508658]))
(('__label__ko',), array([1.00006533]))
(('__label__jv',), array([0.18413153]))
(('__label__nn',), array([0.40634421]))
(('__label__es',), array([0.99151886]))
(('__label__ar',), array([0.99116105]))
(('__label__hi',), array([0.99201119]))
(('__label__el',), array([0.99727237]))
(('__label__he',), array([0.99948519]))
(('__label__vi',), array([0.11493701]))
(('__label__pms',), array([0.16205211]))


(('__label__tr',), array([0.97840792]))
(('__label__pl',), array([0.99440545]))
(('__label__zh',), array([0.8544805]))
(('__label__fr',), array([0.97557932]))
(('__label__nl',), array([0.99692214]))
(('__label__th',), array([0.99966484]))
(('__label__tr',), array([0.97514516]))
(('__label__de',), array([0.994618]))
(('__label__fi',), array([0.96574092]))
(('__label__ur',), array([0.96969658]))
(('__label__et',), array([0.58252573]))
(('__label__fr',), array([0.9951095]))
(('__label__sv',), array([0.98679054]))
(('__label__pt',), array([0.9665091]))
(('__label__pt',), array([0.99497384]))
(('__label__de',), array([0.99801612]))
(('__label__en',), array([0.45491171]))
(('__label__lt',), array([0.97401202]))
(('__label__es',), array([0.96964091]))
(('__label__fa',), array([0.98802704]))
(('__label__fi',), array([0.98330754]))
(('__label__zh',), array([0.99692929]))
(('__label__uk',), array([0.99588865]))
(('__label__zh',), array([0.75187379]))
(('__label__te',), array([0.99981195]))
(('__

(('__label__pms',), array([0.25883985]))
(('__label__fr',), array([0.98463786]))
(('__label__hi',), array([0.95785373]))
(('__label__gu',), array([0.99909174]))
(('__label__pt',), array([0.99042171]))
(('__label__zh',), array([0.97834569]))
(('__label__zh',), array([0.99444669]))
(('__label__es',), array([0.98298031]))
(('__label__zh',), array([0.8287487]))
(('__label__el',), array([0.99779063]))
(('__label__zh',), array([0.99552226]))
(('__label__zh',), array([0.91213417]))
(('__label__mk',), array([0.9135592]))
(('__label__ar',), array([0.99350518]))
(('__label__ko',), array([1.00006998]))
(('__label__lv',), array([0.96503472]))
(('__label__hu',), array([0.98760837]))
(('__label__vi',), array([0.99837762]))
(('__label__sq',), array([0.99690396]))
(('__label__zh',), array([0.99870896]))
(('__label__ml',), array([0.99882305]))
(('__label__bn',), array([0.99550205]))
(('__label__de',), array([0.99754959]))
(('__label__sv',), array([0.99401903]))
(('__label__zh',), array([0.99320531]))
(

(('__label__he',), array([0.99982697]))
(('__label__lt',), array([0.94897574]))
(('__label__zh',), array([0.99390286]))
(('__label__id',), array([0.8463105]))
(('__label__vi',), array([0.20098104]))
(('__label__mk',), array([0.86045939]))
(('__label__fi',), array([0.98318124]))
(('__label__ja',), array([1.00002515]))
(('__label__ru',), array([0.97810912]))
(('__label__fi',), array([0.99097866]))
(('__label__pt',), array([0.97905958]))
(('__label__fr',), array([0.9879548]))
(('__label__lv',), array([0.85760498]))
(('__label__zh',), array([0.99340904]))
(('__label__fr',), array([0.99353904]))
(('__label__sq',), array([0.24564147]))
(('__label__ta',), array([0.99978137]))
(('__label__mk',), array([0.85858327]))
(('__label__ro',), array([0.97463423]))
(('__label__ur',), array([0.93681902]))
(('__label__cs',), array([0.97484559]))
(('__label__uk',), array([0.99757975]))
(('__label__fr',), array([0.97774327]))
(('__label__da',), array([0.82593876]))
(('__label__hu',), array([0.98895162]))
((

(('__label__hu',), array([0.98852611]))
(('__label__mk',), array([0.94165456]))
(('__label__fr',), array([0.99446738]))
(('__label__he',), array([0.99971521]))
(('__label__ar',), array([0.99467766]))
(('__label__zh',), array([0.99888372]))
(('__label__cs',), array([0.98871207]))
(('__label__ru',), array([0.98511195]))
(('__label__ar',), array([0.98652464]))
(('__label__fa',), array([0.98371345]))
(('__label__sk',), array([0.72691917]))
(('__label__pt',), array([0.9542188]))
(('__label__it',), array([0.97097337]))
(('__label__bn',), array([0.99452531]))
(('__label__de',), array([0.98143113]))
(('__label__ro',), array([0.99851036]))
(('__label__cs',), array([0.9855423]))
(('__label__uk',), array([0.9978438]))
(('__label__mk',), array([0.90412885]))
(('__label__tr',), array([0.99696356]))
(('__label__nl',), array([0.99906588]))
(('__label__sk',), array([0.73188788]))
(('__label__el',), array([0.99664569]))
(('__label__th',), array([0.99990445]))
(('__label__nl',), array([0.9813571]))
(('_

(('__label__zh',), array([1.00004089]))
(('__label__es',), array([0.98384553]))
(('__label__zh',), array([0.91981369]))
(('__label__sq',), array([0.99740052]))
(('__label__pl',), array([0.99561751]))
(('__label__ko',), array([1.00006926]))
(('__label__ro',), array([0.99587429]))
(('__label__ro',), array([0.97955954]))
(('__label__sk',), array([0.76883584]))
(('__label__ar',), array([0.99746782]))
(('__label__de',), array([0.99119091]))
(('__label__sk',), array([0.79181606]))
(('__label__ja',), array([1.00004041]))
(('__label__tr',), array([0.91718233]))
(('__label__zh',), array([0.99870032]))
(('__label__nl',), array([0.99466377]))
(('__label__de',), array([0.9825598]))
(('__label__lt',), array([0.97907835]))
(('__label__ur',), array([0.78158033]))
(('__label__ko',), array([1.00006759]))
(('__label__sv',), array([0.97778553]))
(('__label__ar',), array([0.98634684]))
(('__label__ro',), array([0.99820167]))
(('__label__ar',), array([0.98831576]))
(('__label__mk',), array([0.9593839]))
((

(('__label__tr',), array([0.98956025]))
(('__label__da',), array([0.77511364]))
(('__label__zh',), array([0.99783689]))
(('__label__en',), array([0.95842767]))
(('__label__nl',), array([0.99278772]))
(('__label__tl',), array([0.92873174]))
(('__label__ta',), array([0.99978447]))
(('__label__lt',), array([0.99087018]))
(('__label__ru',), array([0.97754216]))
(('__label__id',), array([0.74613816]))
(('__label__es',), array([0.95412612]))
(('__label__bg',), array([0.95343989]))
(('__label__mk',), array([0.79339004]))
(('__label__vi',), array([0.98882377]))
(('__label__en',), array([0.98221141]))
(('__label__zh',), array([0.99012238]))
(('__label__fi',), array([0.95363593]))
(('__label__lt',), array([0.99567908]))
(('__label__lt',), array([0.90714175]))
(('__label__zh',), array([0.99932379]))
(('__label__mk',), array([0.94542134]))
(('__label__zh',), array([0.99919802]))
(('__label__te',), array([0.999924]))
(('__label__sk',), array([0.77814132]))
(('__label__lt',), array([0.9897117]))
(('

(('__label__uk',), array([0.9979375]))
(('__label__ml',), array([0.99951798]))
(('__label__ru',), array([0.99261826]))
(('__label__id',), array([0.83113271]))
(('__label__bn',), array([0.99669486]))
(('__label__hu',), array([0.98656702]))
(('__label__zh',), array([0.99992394]))
(('__label__hu',), array([0.98892093]))
(('__label__cs',), array([0.98938972]))
(('__label__it',), array([0.94464368]))
(('__label__fa',), array([0.91171658]))
(('__label__en',), array([0.32247701]))
(('__label__ro',), array([0.994555]))
(('__label__cs',), array([0.97272635]))
(('__label__it',), array([0.99656856]))
(('__label__it',), array([0.99613935]))
(('__label__ro',), array([0.99804926]))
(('__label__ht',), array([0.1300178]))
(('__label__es',), array([0.98320562]))
(('__label__es',), array([0.99434358]))
(('__label__sk',), array([0.78237331]))
(('__label__nl',), array([0.98867106]))
(('__label__tr',), array([0.99369371]))
(('__label__es',), array([0.95158035]))
(('__label__el',), array([0.99752301]))
(('_

(('__label__cs',), array([0.96100676]))
(('__label__ar',), array([0.98960775]))
(('__label__fr',), array([0.97552973]))
(('__label__bn',), array([0.99594671]))
(('__label__lv',), array([0.95771903]))
(('__label__en',), array([0.97252351]))
(('__label__mk',), array([0.92775053]))
(('__label__ru',), array([0.99355352]))
(('__label__ja',), array([1.00001717]))
(('__label__tr',), array([0.99451005]))
(('__label__ja',), array([1.00004435]))
(('__label__te',), array([1.00002813]))
(('__label__ro',), array([0.9831447]))
(('__label__lv',), array([0.97267699]))
(('__label__ar',), array([0.98867279]))
(('__label__ar',), array([0.98872787]))
(('__label__zh',), array([0.99999291]))
(('__label__sh',), array([0.68016779]))
(('__label__en',), array([0.92154634]))
(('__label__bn',), array([0.98876762]))
(('__label__da',), array([0.97374314]))
(('__label__mk',), array([0.96124709]))
(('__label__id',), array([0.8190757]))
(('__label__ar',), array([0.99418062]))
(('__label__ta',), array([0.99965185]))
((

(('__label__te',), array([0.99992406]))
(('__label__pl',), array([0.98456562]))
(('__label__nl',), array([0.96778262]))
(('__label__lt',), array([0.99987018]))
(('__label__sq',), array([0.99526304]))
(('__label__fi',), array([0.72321171]))
(('__label__it',), array([0.99172801]))
(('__label__de',), array([0.99702638]))
(('__label__gu',), array([0.99850219]))
(('__label__es',), array([0.97908837]))
(('__label__id',), array([0.73761564]))
(('__label__hr',), array([0.32445171]))
(('__label__ta',), array([0.99989593]))
(('__label__lt',), array([0.90245283]))
(('__label__da',), array([0.87140036]))
(('__label__da',), array([0.73376077]))
(('__label__pl',), array([0.99436593]))
(('__label__da',), array([0.89078963]))
(('__label__sk',), array([0.9293676]))
(('__label__zh',), array([0.85780257]))
(('__label__bn',), array([0.9951629]))
(('__label__nn',), array([0.98513418]))
(('__label__de',), array([0.99531007]))
(('__label__sv',), array([0.99401844]))
(('__label__sk',), array([0.69423079]))
((

(('__label__sr',), array([0.4265123]))
(('__label__ku',), array([0.14474632]))
(('__label__zh',), array([0.999897]))
(('__label__gu',), array([0.99997038]))
(('__label__lt',), array([0.97982979]))
(('__label__hr',), array([0.29230547]))
(('__label__nn',), array([0.87333941]))
(('__label__bn',), array([0.99199641]))
(('__label__sv',), array([0.97336131]))
(('__label__lt',), array([0.99256498]))
(('__label__vi',), array([0.99658471]))
(('__label__sq',), array([0.99810517]))
(('__label__fa',), array([0.98596275]))
(('__label__lt',), array([0.96448219]))
(('__label__hr',), array([0.37383857]))
(('__label__bn',), array([0.9911055]))
(('__label__ml',), array([0.99917483]))
(('__label__ja',), array([1.00004804]))
(('__label__da',), array([0.91855621]))
(('__label__hi',), array([0.98439413]))
(('__label__lv',), array([0.94343352]))
(('__label__ro',), array([0.9916907]))
(('__label__el',), array([0.99745339]))
(('__label__en',), array([0.96785003]))
(('__label__hr',), array([0.31427062]))
(('__

(('__label__bg',), array([0.91117722]))
(('__label__es',), array([0.99044853]))
(('__label__lt',), array([0.95015776]))
(('__label__vi',), array([0.99685597]))
(('__label__bn',), array([0.9956671]))
(('__label__tl',), array([0.87761611]))
(('__label__sv',), array([0.9972344]))
(('__label__hi',), array([0.94829988]))
(('__label__nl',), array([0.99964917]))
(('__label__en',), array([0.96337354]))
(('__label__fi',), array([0.97601175]))
(('__label__de',), array([0.99526107]))
(('__label__lt',), array([0.96527195]))
(('__label__th',), array([0.99943703]))
(('__label__vi',), array([0.99899387]))
(('__label__ku',), array([0.16226718]))
(('__label__sq',), array([0.99709761]))
(('__label__ur',), array([0.95247507]))
(('__label__vi',), array([0.99751282]))
(('__label__tl',), array([0.92976099]))
(('__label__pl',), array([0.99539381]))
(('__label__et',), array([0.84906965]))
(('__label__nn',), array([0.71942955]))
(('__label__it',), array([0.99263424]))
(('__label__fr',), array([0.97158414]))
((

(('__label__et',), array([0.93221492]))
(('__label__tl',), array([0.92546916]))
(('__label__pt',), array([0.98946548]))
(('__label__da',), array([0.5360986]))
(('__label__ja',), array([1.00004053]))
(('__label__sv',), array([0.98574859]))
(('__label__id',), array([0.74093348]))
(('__label__vi',), array([0.22208482]))
(('__label__zh',), array([0.99581379]))
(('__label__ms',), array([0.49738595]))
(('__label__tl',), array([0.11679511]))
(('__label__bn',), array([0.99462312]))
(('__label__ml',), array([0.99925452]))
(('__label__tr',), array([0.9832527]))
(('__label__ro',), array([0.98951662]))
(('__label__zh',), array([0.66241652]))
(('__label__ar',), array([0.99029994]))
(('__label__hu',), array([0.99409342]))
(('__label__hu',), array([0.94703037]))
(('__label__zh',), array([0.99531341]))
(('__label__nl',), array([0.98966962]))
(('__label__en',), array([0.98766905]))
(('__label__tl',), array([0.85700965]))
(('__label__ta',), array([0.99974459]))
(('__label__cs',), array([0.9933601]))
(('

(('__label__hu',), array([0.97019941]))
(('__label__bg',), array([0.78285581]))
(('__label__he',), array([0.9997412]))
(('__label__ur',), array([0.9629674]))
(('__label__pt',), array([0.94686002]))
(('__label__fi',), array([0.9854086]))
(('__label__tl',), array([0.96589297]))
(('__label__fa',), array([0.99559402]))
(('__label__ta',), array([0.99953675]))
(('__label__th',), array([0.99973303]))
(('__label__vi',), array([0.99841869]))
(('__label__de',), array([0.93639714]))
(('__label__zh',), array([1.00001764]))
(('__label__lt',), array([0.97596413]))
(('__label__zh',), array([0.98899257]))
(('__label__ml',), array([0.99909854]))
(('__label__fr',), array([0.95668161]))
(('__label__ro',), array([0.99778342]))
(('__label__hi',), array([0.98076004]))
(('__label__ml',), array([0.99918723]))
(('__label__ru',), array([0.96664172]))
(('__label__es',), array([0.98291409]))
(('__label__th',), array([0.99795973]))
(('__label__fi',), array([0.99341547]))
(('__label__hu',), array([0.98630899]))
(('

(('__label__en',), array([0.98943001]))
(('__label__th',), array([0.99944609]))
(('__label__el',), array([0.99690419]))
(('__label__vi',), array([0.99968839]))
(('__label__zh',), array([0.96862262]))
(('__label__id',), array([0.82277876]))
(('__label__tl',), array([0.17812318]))
(('__label__tl',), array([0.82286984]))
(('__label__sv',), array([0.99942315]))
(('__label__tr',), array([0.99246597]))
(('__label__sv',), array([0.99942338]))
(('__label__ar',), array([0.98224205]))
(('__label__lv',), array([0.77511466]))
(('__label__fr',), array([0.7719025]))
(('__label__hr',), array([0.49723452]))
(('__label__ja',), array([0.99883956]))
(('__label__vi',), array([0.16541894]))
(('__label__te',), array([0.99994099]))
(('__label__zh',), array([0.93377626]))
(('__label__tr',), array([0.98716766]))
(('__label__ml',), array([0.99937743]))
(('__label__en',), array([0.95048445]))
(('__label__mk',), array([0.98125917]))
(('__label__nn',), array([0.83634669]))
(('__label__sq',), array([0.99753386]))
(

(('__label__th',), array([0.99956489]))
(('__label__mk',), array([0.9674567]))
(('__label__el',), array([0.99816549]))
(('__label__hr',), array([0.38962409]))
(('__label__uk',), array([0.99440157]))
(('__label__hr',), array([0.37748557]))
(('__label__nl',), array([0.98070735]))
(('__label__en',), array([0.98069191]))
(('__label__fr',), array([0.90222257]))
(('__label__hu',), array([0.9776395]))
(('__label__fr',), array([0.96806514]))
(('__label__ur',), array([0.83763224]))
(('__label__el',), array([0.99801034]))
(('__label__en',), array([0.87815005]))
(('__label__zh',), array([0.99972939]))
(('__label__zh',), array([0.99308717]))
(('__label__ur',), array([0.66868395]))
(('__label__nl',), array([0.99072295]))
(('__label__ml',), array([0.99868387]))
(('__label__sv',), array([0.96094924]))
(('__label__hu',), array([0.99322879]))
(('__label__ta',), array([0.99995732]))
(('__label__ar',), array([0.94575459]))
(('__label__fa',), array([0.98239052]))
(('__label__cs',), array([0.99366325]))
((

(('__label__da',), array([0.9116689]))
(('__label__no',), array([0.72415519]))
(('__label__en',), array([0.98904133]))
(('__label__da',), array([0.80635321]))
(('__label__hi',), array([0.98118114]))
(('__label__es',), array([0.99108911]))
(('__label__te',), array([0.9999826]))
(('__label__zh',), array([0.89969254]))
(('__label__ml',), array([0.99914628]))
(('__label__en',), array([0.81679642]))
(('__label__bg',), array([0.80627257]))
(('__label__zh',), array([0.99937028]))
(('__label__ta',), array([0.99962842]))
(('__label__it',), array([0.99071103]))
(('__label__ru',), array([0.98353499]))
(('__label__th',), array([0.99983251]))
(('__label__tl',), array([0.84403151]))
(('__label__ru',), array([0.99244916]))
(('__label__lv',), array([0.94625634]))
(('__label__zh',), array([0.99382097]))
(('__label__ru',), array([0.98835224]))
(('__label__te',), array([0.99948573]))
(('__label__zh',), array([0.99943835]))
(('__label__hu',), array([0.99436599]))
(('__label__hu',), array([0.99268252]))
((

(('__label__bg',), array([0.98183221]))
(('__label__tl',), array([0.97601169]))
(('__label__nl',), array([0.9947561]))
(('__label__zh',), array([0.9999302]))
(('__label__it',), array([0.99079818]))
(('__label__ja',), array([1.00004768]))
(('__label__ur',), array([0.84931779]))
(('__label__gu',), array([0.9998486]))
(('__label__fa',), array([0.97715765]))
(('__label__ml',), array([0.99947631]))
(('__label__bg',), array([0.92189813]))
(('__label__sq',), array([0.95465326]))
(('__label__hr',), array([0.4581418]))
(('__label__bg',), array([0.97330904]))
(('__label__uk',), array([0.99157494]))
(('__label__es',), array([0.94092613]))
(('__label__uk',), array([0.99464262]))
(('__label__id',), array([0.81147963]))
(('__label__zh',), array([0.94534588]))
(('__label__sv',), array([0.99473]))
(('__label__it',), array([0.98844975]))
(('__label__zh',), array([0.99455696]))
(('__label__mk',), array([0.90760654]))
(('__label__sv',), array([0.99979115]))
(('__label__en',), array([0.98785555]))
(('__la

(('__label__ml',), array([0.99920523]))
(('__label__hr',), array([0.60144025]))
(('__label__hu',), array([0.99416977]))
(('__label__tr',), array([0.99405575]))
(('__label__el',), array([0.99743217]))
(('__label__hi',), array([0.98299551]))
(('__label__mk',), array([0.68970782]))
(('__label__th',), array([0.99948502]))
(('__label__sv',), array([0.9990229]))
(('__label__tr',), array([0.96692044]))
(('__label__ko',), array([1.00006962]))
(('__label__lv',), array([0.95975661]))
(('__label__zh',), array([0.99724317]))
(('__label__fi',), array([0.98700362]))
(('__label__th',), array([0.99939609]))
(('__label__sv',), array([0.99929881]))
(('__label__sq',), array([0.99547583]))
(('__label__tl',), array([0.91792154]))
(('__label__es',), array([0.93663323]))
(('__label__ko',), array([1.00006866]))
(('__label__ja',), array([1.00004613]))
(('__label__ko',), array([1.00005043]))
(('__label__he',), array([0.99967474]))
(('__label__el',), array([0.99784797]))
(('__label__it',), array([0.96602333]))
(

(('__label__zh',), array([0.98812729]))
(('__label__nn',), array([0.57301295]))
(('__label__tl',), array([0.96775442]))
(('__label__th',), array([0.99965799]))
(('__label__fr',), array([0.98542035]))
(('__label__gu',), array([0.99978131]))
(('__label__ml',), array([0.99947327]))
(('__label__it',), array([0.98229843]))
(('__label__ru',), array([0.9867152]))
(('__label__lt',), array([0.94677615]))
(('__label__hi',), array([0.95389515]))
(('__label__es',), array([0.99194556]))
(('__label__no',), array([0.37172174]))
(('__label__ku',), array([0.23814346]))
(('__label__th',), array([0.99977279]))
(('__label__de',), array([0.9995954]))
(('__label__sv',), array([0.999551]))
(('__label__jv',), array([0.1602339]))
(('__label__el',), array([0.99525166]))
(('__label__el',), array([0.99730444]))
(('__label__hu',), array([0.74765247]))
(('__label__lt',), array([0.96518141]))
(('__label__hr',), array([0.45898274]))
(('__label__mk',), array([0.93973887]))
(('__label__zh',), array([0.95781839]))
(('__

(('__label__tr',), array([0.98474741]))
(('__label__et',), array([0.94320869]))
(('__label__nl',), array([0.99598688]))
(('__label__ar',), array([0.9952718]))
(('__label__el',), array([0.99513072]))
(('__label__he',), array([0.99970829]))
(('__label__en',), array([0.84684795]))
(('__label__hu',), array([0.98834705]))
(('__label__da',), array([0.92707604]))
(('__label__fa',), array([0.91792291]))
(('__label__el',), array([0.99599808]))
(('__label__gu',), array([0.99721736]))
(('__label__cs',), array([0.99081302]))
(('__label__sk',), array([0.92749918]))
(('__label__zh',), array([0.73732579]))
(('__label__zh',), array([0.69389081]))
(('__label__ml',), array([0.99860674]))
(('__label__hi',), array([0.98989409]))
(('__label__fa',), array([0.98926336]))
(('__label__fa',), array([0.99037552]))
(('__label__sq',), array([0.99807602]))
(('__label__en',), array([0.97666264]))
(('__label__uk',), array([0.93854624]))
(('__label__et',), array([0.8665958]))
(('__label__uk',), array([0.99269795]))
((

(('__label__it',), array([0.95274591]))
(('__label__pl',), array([0.99058586]))
(('__label__gu',), array([0.98637551]))
(('__label__ja',), array([1.00004208]))
(('__label__nn',), array([0.59304076]))
(('__label__en',), array([0.70002556]))
(('__label__fi',), array([0.98691791]))
(('__label__ro',), array([0.9385097]))
(('__label__it',), array([0.97953391]))
(('__label__gu',), array([0.99923086]))
(('__label__pl',), array([0.9954204]))
(('__label__hi',), array([0.98924333]))
(('__label__sk',), array([0.94216919]))
(('__label__th',), array([0.99975473]))
(('__label__nl',), array([0.98649961]))
(('__label__ja',), array([1.00001752]))
(('__label__lv',), array([0.91104382]))
(('__label__pl',), array([0.99901313]))
(('__label__hr',), array([0.60010666]))
(('__label__lt',), array([0.97502214]))
(('__label__en',), array([0.95161837]))
(('__label__ml',), array([0.99903184]))
(('__label__sq',), array([0.98456812]))
(('__label__tl',), array([0.7117666]))
(('__label__en',), array([0.73366803]))
(('

(('__label__zh',), array([0.99933821]))
(('__label__hu',), array([0.99200046]))
(('__label__ar',), array([0.99418801]))
(('__label__lt',), array([0.99289453]))
(('__label__ar',), array([0.99405921]))
(('__label__zh',), array([0.98565292]))
(('__label__ro',), array([0.62445271]))
(('__label__zh',), array([1.00000632]))
(('__label__ta',), array([0.99998224]))
(('__label__zh',), array([0.83311111]))
(('__label__fr',), array([0.98932338]))
(('__label__ja',), array([1.00004053]))
(('__label__tr',), array([0.99139714]))
(('__label__th',), array([0.99966067]))
(('__label__hu',), array([0.98897189]))
(('__label__pl',), array([0.99084872]))
(('__label__ml',), array([0.99934411]))
(('__label__cs',), array([0.97900957]))
(('__label__sq',), array([0.99258691]))
(('__label__sq',), array([0.99040747]))
(('__label__ur',), array([0.81663644]))
(('__label__nn',), array([0.51474321]))
(('__label__uk',), array([0.98930031]))
(('__label__fi',), array([0.96481419]))
(('__label__nl',), array([0.83138549]))


(('__label__cs',), array([0.97909927]))
(('__label__sk',), array([0.66569948]))
(('__label__zh',), array([0.99956781]))
(('__label__bg',), array([0.95911491]))
(('__label__pt',), array([0.95583576]))
(('__label__zh',), array([0.99999034]))
(('__label__hu',), array([0.95897496]))
(('__label__sq',), array([0.97965813]))
(('__label__fi',), array([0.99345171]))
(('__label__lt',), array([0.95861441]))
(('__label__tl',), array([0.94205892]))
(('__label__bg',), array([0.95644194]))
(('__label__wuu',), array([0.49230137]))
(('__label__fa',), array([0.98330319]))
(('__label__zh',), array([0.99512821]))
(('__label__fa',), array([0.99175632]))
(('__label__de',), array([0.98828679]))
(('__label__mk',), array([0.88483322]))
(('__label__gu',), array([0.99988765]))
(('__label__en',), array([0.97757894]))
(('__label__da',), array([0.97243339]))
(('__label__he',), array([0.999695]))
(('__label__hu',), array([0.99303561]))
(('__label__id',), array([0.87574983]))
(('__label__sv',), array([0.99873793]))
(

(('__label__ru',), array([0.99200118]))
(('__label__ro',), array([0.99819678]))
(('__label__tl',), array([0.94437748]))
(('__label__bn',), array([0.9856956]))
(('__label__pt',), array([0.97460884]))
(('__label__el',), array([0.99553543]))
(('__label__lv',), array([0.8420521]))
(('__label__bn',), array([0.99244219]))
(('__label__pl',), array([0.99020129]))
(('__label__id',), array([0.64526403]))
(('__label__de',), array([0.99797606]))
(('__label__cs',), array([0.97999424]))
(('__label__id',), array([0.86981612]))
(('__label__lv',), array([0.91238612]))
(('__label__fr',), array([0.94059271]))
(('__label__gu',), array([0.99982458]))
(('__label__lt',), array([0.99307626]))
(('__label__sv',), array([0.99269629]))
(('__label__tr',), array([0.98843575]))
(('__label__ja',), array([1.00003731]))
(('__label__pl',), array([0.99622399]))
(('__label__gu',), array([0.99981457]))
(('__label__lt',), array([0.98918664]))
(('__label__fr',), array([0.98855144]))
(('__label__it',), array([0.9820748]))
(('

(('__label__hr',), array([0.47249192]))
(('__label__ta',), array([0.9998827]))
(('__label__hr',), array([0.44224891]))
(('__label__da',), array([0.85234702]))
(('__label__fa',), array([0.98668498]))
(('__label__th',), array([0.99986631]))
(('__label__bg',), array([0.97443306]))
(('__label__cs',), array([0.99391502]))
(('__label__uk',), array([0.99044573]))
(('__label__sk',), array([0.70751965]))
(('__label__nn',), array([0.95978606]))
(('__label__sk',), array([0.66918683]))
(('__label__nn',), array([0.84275466]))
(('__label__pt',), array([0.9240014]))
(('__label__en',), array([0.99721396]))
(('__label__fa',), array([0.99138355]))
(('__label__ml',), array([0.99943852]))
(('__label__zh',), array([0.99994189]))
(('__label__id',), array([0.81499195]))
(('__label__nl',), array([0.98961049]))
(('__label__zh',), array([0.94853228]))
(('__label__vi',), array([0.99752456]))
(('__label__uk',), array([0.99463856]))
(('__label__ru',), array([0.99107045]))
(('__label__bg',), array([0.9776817]))
(('

(('__label__hi',), array([0.99049252]))
(('__label__mk',), array([0.95319492]))
(('__label__sv',), array([0.98013705]))
(('__label__hu',), array([0.98828274]))
(('__label__de',), array([0.97139579]))
(('__label__he',), array([0.99931169]))
(('__label__nl',), array([0.99104154]))
(('__label__da',), array([0.87931502]))
(('__label__ru',), array([0.99130112]))
(('__label__hi',), array([0.98592621]))
(('__label__pms',), array([0.20483848]))
(('__label__es',), array([0.98004568]))
(('__label__ja',), array([1.00002086]))
(('__label__th',), array([0.99978477]))
(('__label__et',), array([0.8898468]))
(('__label__ur',), array([0.92949909]))
(('__label__fr',), array([0.9906162]))
(('__label__mk',), array([0.82416952]))
(('__label__es',), array([0.9776606]))
(('__label__lv',), array([0.879628]))
(('__label__bn',), array([0.99722803]))
(('__label__tl',), array([0.92007667]))
(('__label__sk',), array([0.87257111]))
(('__label__sq',), array([0.99382019]))
(('__label__id',), array([0.89828891]))
(('_

(('__label__en',), array([0.70457476]))
(('__label__it',), array([0.99258316]))
(('__label__mk',), array([0.97081918]))
(('__label__vi',), array([0.99938488]))
(('__label__zh',), array([0.82596225]))
(('__label__ko',), array([1.00006998]))
(('__label__ar',), array([0.99048692]))
(('__label__de',), array([0.99706155]))
(('__label__tr',), array([0.98675025]))
(('__label__id',), array([0.78816378]))
(('__label__ar',), array([0.98950416]))
(('__label__mk',), array([0.96135968]))
(('__label__zh',), array([0.99962455]))
(('__label__he',), array([0.99961478]))
(('__label__cs',), array([0.93456733]))
(('__label__zh',), array([0.98841012]))
(('__label__sv',), array([0.9957028]))
(('__label__sr',), array([0.3431516]))
(('__label__uk',), array([0.99591154]))
(('__label__zh',), array([0.99960375]))
(('__label__fa',), array([0.99123633]))
(('__label__ta',), array([0.99976885]))
(('__label__hi',), array([0.9606908]))
(('__label__ro',), array([0.99793696]))
(('__label__tr',), array([0.79322237]))
(('

(('__label__fa',), array([0.9603405]))
(('__label__vi',), array([0.99864465]))
(('__label__zh',), array([0.97458357]))
(('__label__id',), array([0.72207314]))
(('__label__ja',), array([1.00004995]))
(('__label__pt',), array([0.93825698]))
(('__label__gu',), array([0.99902898]))
(('__label__pt',), array([0.97920984]))
(('__label__fi',), array([0.97995061]))
(('__label__ta',), array([0.99979335]))
(('__label__uk',), array([0.98781633]))
(('__label__ta',), array([0.99855167]))
(('__label__nl',), array([0.92132151]))
(('__label__th',), array([0.99979281]))
(('__label__zh',), array([0.99916673]))
(('__label__bn',), array([0.99372917]))
(('__label__da',), array([0.72507429]))
(('__label__ur',), array([0.9178524]))
(('__label__cs',), array([0.97531825]))
(('__label__gu',), array([0.99968016]))
(('__label__en',), array([0.32848823]))
(('__label__pt',), array([0.95499218]))
(('__label__uk',), array([0.99549323]))
(('__label__ja',), array([0.76051533]))
(('__label__mk',), array([0.94351143]))
((

(('__label__ru',), array([0.97342056]))
(('__label__zh',), array([0.99772882]))
(('__label__fa',), array([0.99526739]))
(('__label__hr',), array([0.45776823]))
(('__label__ar',), array([0.98282361]))
(('__label__de',), array([0.99766624]))
(('__label__zh',), array([0.99978739]))
(('__label__ro',), array([0.99282199]))
(('__label__fi',), array([0.98891032]))
(('__label__te',), array([0.99991864]))
(('__label__et',), array([0.91301137]))
(('__label__en',), array([0.98544586]))
(('__label__ur',), array([0.82789022]))
(('__label__da',), array([0.80575383]))
(('__label__nl',), array([0.87709451]))
(('__label__hi',), array([0.98625892]))
(('__label__es',), array([0.98799109]))
(('__label__en',), array([0.96808696]))
(('__label__tl',), array([0.24059163]))
(('__label__bg',), array([0.98780221]))
(('__label__hi',), array([0.98716068]))
(('__label__it',), array([0.99077642]))
(('__label__et',), array([0.82225001]))
(('__label__lt',), array([0.94124705]))
(('__label__zh',), array([1.00003505]))


(('__label__sk',), array([0.89567775]))
(('__label__fr',), array([0.98206282]))
(('__label__nn',), array([0.96640462]))
(('__label__hu',), array([0.9989149]))
(('__label__fa',), array([0.97392529]))
(('__label__tl',), array([0.91671097]))
(('__label__sq',), array([0.97544217]))
(('__label__pt',), array([0.94494671]))
(('__label__hr',), array([0.49321064]))
(('__label__pt',), array([0.98366284]))
(('__label__ja',), array([1.00004482]))
(('__label__zh',), array([0.99622709]))
(('__label__pl',), array([0.99562579]))
(('__label__pt',), array([0.95990807]))
(('__label__uk',), array([0.97828454]))
(('__label__en',), array([0.1374902]))
(('__label__id',), array([0.62131482]))
(('__label__hr',), array([0.56547803]))
(('__label__fi',), array([0.98911816]))
(('__label__es',), array([0.97747749]))
(('__label__da',), array([0.6913085]))
(('__label__sk',), array([0.65582108]))
(('__label__zh',), array([0.99835163]))
(('__label__el',), array([0.99792188]))
(('__label__te',), array([0.99993181]))
(('

(('__label__fi',), array([0.9160738]))
(('__label__hi',), array([0.98299551]))
(('__label__tl',), array([0.96735036]))
(('__label__vi',), array([0.9992196]))
(('__label__pt',), array([0.87805986]))
(('__label__ro',), array([0.996068]))
(('__label__pl',), array([0.99213475]))
(('__label__tr',), array([0.87563956]))
(('__label__ja',), array([1.00003242]))
(('__label__sq',), array([0.99387079]))
(('__label__ta',), array([0.99971801]))
(('__label__pl',), array([0.99779642]))
(('__label__lt',), array([0.99687988]))
(('__label__hr',), array([0.33466443]))
(('__label__cs',), array([0.95766151]))
(('__label__ar',), array([0.99218321]))
(('__label__id',), array([0.76643676]))
(('__label__fr',), array([0.97640491]))
(('__label__he',), array([0.99983513]))
(('__label__ru',), array([0.99410456]))
(('__label__en',), array([0.97639942]))
(('__label__zh',), array([0.99996215]))
(('__label__et',), array([0.92048323]))
(('__label__ro',), array([0.99864203]))
(('__label__fi',), array([0.99627298]))
(('_

(('__label__tr',), array([0.9939149]))
(('__label__ta',), array([0.99988312]))
(('__label__ja',), array([1.00004244]))
(('__label__sv',), array([0.99927241]))
(('__label__te',), array([0.99997813]))
(('__label__he',), array([0.99992186]))
(('__label__cs',), array([0.98953712]))
(('__label__ro',), array([0.99457294]))
(('__label__bn',), array([0.99461865]))
(('__label__ku',), array([0.13144056]))
(('__label__en',), array([0.58998561]))
(('__label__hu',), array([0.97405434]))
(('__label__tr',), array([0.99325323]))
(('__label__cs',), array([0.98247468]))
(('__label__sq',), array([0.98112065]))
(('__label__nl',), array([0.99529493]))
(('__label__da',), array([0.83084375]))
(('__label__de',), array([0.99974614]))
(('__label__mk',), array([0.9598006]))
(('__label__te',), array([0.99980474]))
(('__label__uk',), array([0.99317747]))
(('__label__en',), array([0.60020167]))
(('__label__da',), array([0.92699218]))
(('__label__sq',), array([0.99788153]))
(('__label__te',), array([0.99975544]))
((

(('__label__da',), array([0.82047898]))
(('__label__he',), array([0.99970543]))
(('__label__lt',), array([0.96160722]))
(('__label__ro',), array([0.99828243]))
(('__label__ar',), array([0.9860003]))
(('__label__ru',), array([0.9920873]))
(('__label__pl',), array([0.9980402]))
(('__label__vi',), array([0.99719614]))
(('__label__sv',), array([0.99445885]))
(('__label__ru',), array([0.98729169]))
(('__label__ar',), array([0.99195129]))
(('__label__hr',), array([0.34921646]))
(('__label__bn',), array([0.9923687]))
(('__label__sk',), array([0.93287605]))
(('__label__bn',), array([0.998155]))
(('__label__lv',), array([0.93963593]))
(('__label__nn',), array([0.9484995]))
(('__label__ko',), array([1.00006151]))
(('__label__sv',), array([0.99661654]))
(('__label__hi',), array([0.99008429]))
(('__label__mk',), array([0.88678443]))
(('__label__en',), array([0.99204898]))
(('__label__mk',), array([0.92287093]))
(('__label__es',), array([0.95118237]))
(('__label__id',), array([0.77619863]))
(('__la

(('__label__ar',), array([0.99464083]))
(('__label__zh',), array([0.796893]))
(('__label__ja',), array([1.00003564]))
(('__label__bg',), array([0.96233404]))
(('__label__en',), array([0.73167616]))
(('__label__zh',), array([0.99283743]))
(('__label__sk',), array([0.73696727]))
(('__label__it',), array([0.98366654]))
(('__label__ml',), array([0.99915266]))
(('__label__ar',), array([0.98894012]))
(('__label__te',), array([0.99993348]))
(('__label__en',), array([0.38206685]))
(('__label__ml',), array([0.9995026]))
(('__label__bg',), array([0.88425285]))
(('__label__nl',), array([0.99498087]))
(('__label__th',), array([0.99984169]))
(('__label__nn',), array([0.59869552]))
(('__label__ta',), array([0.9998914]))
(('__label__ur',), array([0.93589586]))
(('__label__ar',), array([0.99370569]))
(('__label__mk',), array([0.93745929]))
(('__label__vi',), array([0.10877127]))
(('__label__ar',), array([0.99141181]))
(('__label__gu',), array([0.9998523]))
(('__label__gu',), array([0.99927264]))
(('__

(('__label__ku',), array([0.21737996]))
(('__label__uk',), array([0.99464011]))
(('__label__ta',), array([0.99987692]))
(('__label__ja',), array([0.99973059]))
(('__label__ta',), array([0.99972802]))
(('__label__bn',), array([0.98434216]))
(('__label__he',), array([0.99942636]))
(('__label__bn',), array([0.99743032]))
(('__label__sv',), array([0.98873007]))
(('__label__ru',), array([0.9902733]))
(('__label__ta',), array([0.99999911]))
(('__label__nl',), array([0.88208306]))
(('__label__ko',), array([1.00006962]))
(('__label__zh',), array([0.99996978]))
(('__label__sv',), array([0.99631244]))
(('__label__ru',), array([0.9889608]))
(('__label__da',), array([0.81272191]))
(('__label__hr',), array([0.22210857]))
(('__label__mk',), array([0.94466817]))
(('__label__pt',), array([0.96934134]))
(('__label__ml',), array([0.99926972]))
(('__label__hi',), array([0.95128644]))
(('__label__ro',), array([0.99773669]))
(('__label__hi',), array([0.98746902]))
(('__label__cs',), array([0.95459795]))
((

(('__label__he',), array([0.99937338]))
(('__label__hi',), array([0.99173254]))
(('__label__en',), array([0.97877389]))
(('__label__da',), array([0.94970548]))
(('__label__ru',), array([0.96132803]))
(('__label__tr',), array([0.99711764]))
(('__label__ru',), array([0.9886027]))
(('__label__zh',), array([0.94707483]))
(('__label__tr',), array([0.99581659]))
(('__label__uk',), array([0.9985947]))
(('__label__bn',), array([0.99216688]))
(('__label__it',), array([0.9983508]))
(('__label__pt',), array([0.96598458]))
(('__label__te',), array([0.99990165]))
(('__label__es',), array([0.90447497]))
(('__label__ko',), array([1.00004232]))
(('__label__ml',), array([0.99935269]))
(('__label__lt',), array([0.91249853]))
(('__label__id',), array([0.85724205]))
(('__label__lt',), array([0.91590077]))
(('__label__it',), array([0.99202514]))
(('__label__ur',), array([0.86523473]))
(('__label__tl',), array([0.90004867]))
(('__label__hr',), array([0.43221161]))
(('__label__uk',), array([0.98115587]))
(('

(('__label__vi',), array([0.16977462]))
(('__label__he',), array([0.99975151]))
(('__label__id',), array([0.79781055]))
(('__label__te',), array([0.99990737]))
(('__label__et',), array([0.88283283]))
(('__label__mk',), array([0.93544906]))
(('__label__ml',), array([0.99927735]))
(('__label__zh',), array([0.96952981]))
(('__label__sv',), array([0.99712628]))
(('__label__hu',), array([0.99364358]))
(('__label__mk',), array([0.97173196]))
(('__label__et',), array([0.89612013]))
(('__label__fr',), array([0.92270541]))
(('__label__cs',), array([0.98478943]))
(('__label__nl',), array([0.98519856]))
(('__label__tl',), array([0.94182432]))
(('__label__hu',), array([0.99122167]))
(('__label__lt',), array([0.97083849]))
(('__label__ta',), array([0.99907076]))
(('__label__fa',), array([0.98097485]))
(('__label__gu',), array([0.99935168]))
(('__label__tl',), array([0.91011882]))
(('__label__fa',), array([0.98392713]))
(('__label__te',), array([0.99982572]))
(('__label__ta',), array([0.99984843]))


(('__label__sk',), array([0.71158022]))
(('__label__he',), array([0.99974668]))
(('__label__hu',), array([0.97048348]))
(('__label__fr',), array([0.96519464]))
(('__label__id',), array([0.92474949]))
(('__label__it',), array([0.97897553]))
(('__label__hr',), array([0.38786036]))
(('__label__cs',), array([0.98827767]))
(('__label__hr',), array([0.38386017]))
(('__label__de',), array([0.97510183]))
(('__label__nl',), array([0.90921766]))
(('__label__de',), array([0.99582034]))
(('__label__de',), array([0.30590117]))
(('__label__pl',), array([0.99351662]))
(('__label__ta',), array([0.99986601]))
(('__label__fi',), array([0.98552191]))
(('__label__ml',), array([0.9993673]))
(('__label__ja',), array([1.00004554]))
(('__label__de',), array([0.99584788]))
(('__label__zh',), array([0.99999785]))
(('__label__gu',), array([0.99964345]))
(('__label__th',), array([0.99914151]))
(('__label__ko',), array([1.00006843]))
(('__label__cs',), array([0.91276038]))
(('__label__nn',), array([0.84155893]))
(

(('__label__he',), array([0.99972075]))
(('__label__nl',), array([0.99591786]))
(('__label__cs',), array([0.99318546]))
(('__label__bg',), array([0.90422368]))
(('__label__et',), array([0.93647361]))
(('__label__tl',), array([0.94758821]))
(('__label__no',), array([0.60424733]))
(('__label__th',), array([0.99990064]))
(('__label__da',), array([0.64930677]))
(('__label__et',), array([0.94383669]))
(('__label__tr',), array([0.99028528]))
(('__label__sk',), array([0.73518252]))
(('__label__nn',), array([0.85032403]))
(('__label__nn',), array([0.84663063]))
(('__label__te',), array([0.99988711]))
(('__label__id',), array([0.81779718]))
(('__label__sq',), array([0.7689904]))
(('__label__te',), array([0.9999997]))
(('__label__uk',), array([0.99596786]))
(('__label__th',), array([0.999403]))
(('__label__hi',), array([0.99153662]))
(('__label__fa',), array([0.98960686]))
(('__label__en',), array([0.98918003]))
(('__label__bg',), array([0.96981883]))
(('__label__zh',), array([0.99239057]))
(('_

(('__label__ko',), array([1.00006962]))
(('__label__ro',), array([0.99512994]))
(('__label__bg',), array([0.96407604]))
(('__label__el',), array([0.99701595]))
(('__label__fa',), array([0.98510218]))
(('__label__bn',), array([0.99483114]))
(('__label__zh',), array([0.99925685]))
(('__label__zh',), array([0.99624628]))
(('__label__lt',), array([0.97733229]))
(('__label__th',), array([0.99957275]))
(('__label__id',), array([0.7033115]))
(('__label__vi',), array([0.99892718]))
(('__label__bg',), array([0.94385171]))
(('__label__sq',), array([0.99548006]))
(('__label__da',), array([0.69400305]))
(('__label__nn',), array([0.84506238]))
(('__label__da',), array([0.89615589]))
(('__label__th',), array([0.99958128]))
(('__label__hi',), array([0.97845966]))
(('__label__th',), array([0.99949789]))
(('__label__hi',), array([0.98152]))
(('__label__hr',), array([0.61295909]))
(('__label__fi',), array([0.97583765]))
(('__label__vi',), array([0.95701253]))
(('__label__nl',), array([0.99815893]))
(('_

(('__label__tl',), array([0.91984338]))
(('__label__bn',), array([0.99655622]))
(('__label__id',), array([0.87252527]))
(('__label__id',), array([0.91334867]))
(('__label__fa',), array([0.99204981]))
(('__label__he',), array([0.99981886]))
(('__label__pl',), array([0.97383642]))
(('__label__zh',), array([0.99921358]))
(('__label__ja',), array([1.00004125]))
(('__label__bg',), array([0.90894198]))
(('__label__et',), array([0.61555523]))
(('__label__ja',), array([1.00003707]))
(('__label__da',), array([0.84241915]))
(('__label__pt',), array([0.96970659]))
(('__label__ru',), array([0.98012561]))
(('__label__cs',), array([0.90269542]))
(('__label__ta',), array([0.99970651]))
(('__label__it',), array([0.97532737]))
(('__label__pms',), array([0.1213733]))
(('__label__en',), array([0.24161464]))
(('__label__ro',), array([0.99359804]))
(('__label__it',), array([0.77003032]))
(('__label__cs',), array([0.96546453]))
(('__label__de',), array([0.99653745]))
(('__label__nl',), array([0.99941945]))


(('__label__id',), array([0.92381966]))
(('__label__zh',), array([0.99160177]))
(('__label__hu',), array([0.98181266]))
(('__label__bg',), array([0.94224328]))
(('__label__zh',), array([0.98185599]))
(('__label__ml',), array([0.99805987]))
(('__label__hi',), array([0.98779124]))
(('__label__th',), array([0.99934608]))
(('__label__uk',), array([0.99791914]))
(('__label__fr',), array([0.98949391]))
(('__label__id',), array([0.47761911]))
(('__label__zh',), array([0.99471676]))
(('__label__vi',), array([0.99843627]))
(('__label__ja',), array([1.00004673]))
(('__label__en',), array([0.8238492]))
(('__label__he',), array([0.9999153]))
(('__label__ko',), array([1.00006986]))
(('__label__et',), array([0.90816849]))
(('__label__ru',), array([0.98714519]))
(('__label__pt',), array([0.96373957]))
(('__label__sk',), array([0.80532306]))
(('__label__es',), array([0.95031106]))
(('__label__tl',), array([0.96619433]))
(('__label__ht',), array([0.18954287]))
(('__label__bn',), array([0.99893397]))
((

(('__label__de',), array([0.96867299]))
(('__label__pt',), array([0.96369588]))
(('__label__ru',), array([0.98492575]))
(('__label__th',), array([0.99906045]))
(('__label__el',), array([0.99747854]))
(('__label__zh',), array([0.98878211]))
(('__label__fr',), array([0.90492815]))
(('__label__tl',), array([0.97383624]))
(('__label__tl',), array([0.13461059]))
(('__label__he',), array([0.99990433]))
(('__label__gu',), array([0.99977809]))
(('__label__lt',), array([0.99780113]))
(('__label__pl',), array([0.99638212]))
(('__label__mk',), array([0.93353224]))
(('__label__de',), array([0.9968136]))
(('__label__lt',), array([0.98666358]))
(('__label__te',), array([0.99980944]))
(('__label__zh',), array([0.99357271]))
(('__label__nl',), array([0.98459274]))
(('__label__ur',), array([0.72801983]))
(('__label__cs',), array([0.96678764]))
(('__label__de',), array([0.99592352]))
(('__label__mk',), array([0.96938014]))
(('__label__nn',), array([0.98733407]))
(('__label__zh',), array([0.99888515]))
(

In [184]:
x_train

Unnamed: 0,Par
0,Klement Gottwaldi surnukeha palsameeriti ning ...
1,"Sebes, Joseph; Pereira Thomas (1961) (på eng)...."
4,ถนนเจริญกรุง (อักษรโรมัน: Thanon Charoen Krung...
7,He was a economics graduate from Elphinstone C...
13,விசாகப்பட்டினம் தமிழ்ச்சங்கத்தை இந்துப் பத்திர...
14,Bùi Tiến Dũng (sinh năm 1959 tại huyện Ứng Hòa...
19,UNC有得一只历史悠久个'诚信守则'。渠是由学堂个诚信法庭（Honor Court）来执行个...
26,De spons behoort tot het geslacht Haliclona en...
29,エノが行きがかりでバスに乗ってしまい、気分が悪くなった際に助けるが、今すぐバスを降りたいと運...
38,Tsutinalar (İngilizce: Tsuut'ina): Kanada'da A...


In [204]:
ft_names = []
for i in range(len(ft_predict)):
    if len(ft_predict[i]) == 3:
        ft_names.append('nan')
    else:
        lang_name = languages.get(alpha_2=ft_predict[i]).name
        if '(' in lang_name:
            ft_names.append(re.sub("[(\[].*?[\)]", "", lang_name)[:-1])
        else:
            ft_names.append(lang_name)

In [205]:
acc = accuracy_score(y_tr_new_1k, ft_names)
print("Accuracy: ", acc)

Accuracy:  0.915125


0         Estonian
1          Swedish
2             Thai
3         Gujarati
4            Tamil
5       Vietnamese
6          Chinese
7            Dutch
8         Japanese
9          Turkish
10            Urdu
11        Japanese
12      Indonesian
13         Chinese
14          Danish
15      Portuguese
16          French
17         Chinese
18          Korean
19       Bulgarian
20            Thai
21        Estonian
22      Portuguese
23      Portuguese
24           Hindi
25           Tamil
26         Spanish
27       Bulgarian
28          French
29          French
           ...    
9970        Hebrew
9971       Chinese
9972       Tagalog
9973       Persian
9974         Czech
9975       Turkish
9976    Portuguese
9977    Lithuanian
9978       Finnish
9979     Hungarian
9980        Danish
9981       Swedish
9982       English
9983       Persian
9984          Thai
9985        Arabic
9986          Urdu
9987       Spanish
9988      Estonian
9989          Urdu
9990       Finnish
9991        

In [224]:
import sys
import gzip
import json

## FastText 
#!pip install fasttext
import fasttext
# download this pretrained model 
model = fasttext.load_model('fast_text_model/lid.176.ftz')

#!pip install pycountry
from pycountry import languages


def get_lang(text):
    '''
    This function uses the pretrained model from FastText 
    to detect the language of a document. 
    It returns a tuple in the form of (('__label__en',), array([0.26207453]))
    '''
    predicted_lang = model.predict(text)
    # only keep the language code such as 'en' from the output of the prediction
    ft_predict = predicted_lang[0][0][9:]
    
    # We need to convert the ISO 639 codes to find full name of the language for each symbol.
    # From the 46 common languages we aimed to detect, they all have two letter codes. 
    if len(ft_predict) != 2:
        ft_names = 'nan'
        
    else:
        # Convert to full name in English 
        lang_name = languages.get(alpha_2=ft_predict).name
        # We only keep the full name of the language
        if '(' in lang_name:
            ft_names= re.sub("[(\[].*?[\)]", "", lang_name)[:-1]
        else:
            ft_names = lang_name
    return ft_names

if __name__ == '__main__':
    
    #Json_path = sys.argv[1]
    Json_path = 'Data/Jobs/linkedin/FF64062CA67CA0E0C4D1720A676C6637_.gz'
    
    with gzip.GzipFile(Json_path,"r") as json_file:
        job = json.load(json_file)
        text = job['Body']
        job['Language'] = get_lang(text) 

    json_str = json.dumps(job, indent = 4) + "\n"   

    with open(Json_path+'new_lang', 'w') as outfile:
        outfile.write(json_str)

In [211]:
import sys
import gzip
import json


def get_lang(text):

    return ''

if __name__ == '__main__':
    
    Json_path = sys.argv[1]
    with open(Json_path,"r") as json_file:
        job = json.load(json_file)
        text = job['Body']
        job['Language'] = get_lang(text) 

    json_str = json.dumps(job, indent = 4) + "\n"   

    with open(Json_path+'new_lang', 'w') as outfile:
        outfile.write(json_str)

<_io.TextIOWrapper name='Data/Jobs/linkedin/0A0BBE99D9A73C88E82671CAB37247FD_.gz' mode='r' encoding='UTF-8'>

NameError: name 'sds' is not defined