In [42]:
import os
import codecs
import collections
import numpy as np


class Vocab:

    def __init__(self, token2index=None, index2token=None):
        self._token2index = token2index or {}
        self._index2token = index2token or []

    def feed(self, token):
        #print "Feeding token "+ token 
        #print type(self._token2index)
        if token not in self._token2index:
            # allocate new index for this token
            index = len(self._token2index)
            self._token2index[token] = index
            self._index2token.append(token)

        return self._token2index[token]

    @property
    #def com(self):
    #    return self._token2index
    def size(self):
        return len(self._token2index)

    def token(self, index):
        return self._index2token[index]

    def __getitem__(self, token):
        index = self.get(token)
        if index is None:
            raise KeyError(token)
        return index

    def get(self, token, default=None):
        return self._token2index.get(token, default)

    def save(self, filename):
        with open(filename, 'wb') as f:
            pickle.dump((self._token2index, self._index2token), f, pickle.HIGHEST_PROTOCOL)

    @classmethod
    def load(cls, filename):
        with open(filename, 'rb') as f:
            token2index, index2token = pickle.load(f)

        return cls(token2index, index2token)


def load_data(data_dir, max_word_length, eos='+'):

    char_vocab = Vocab()
    char_vocab.feed(' ')  # blank is at index 0 in char vocab
    #char_vocab.feed('{')  # start is at index 1 in char vocab
    #char_vocab.feed('}')  # end   is at index 2 in char vocab

    word_vocab = Vocab()
    #word_vocab.feed('|')  # <unk> is at index 0 in word vocab

    actual_max_word_length = 0

    word_tokens = collections.defaultdict(list)
    char_tokens = collections.defaultdict(list)

    for fname in ('train', 'valid', 'test'):
        print('reading', fname)
        with codecs.open(os.path.join(data_dir, fname + '.txt'), 'r', 'utf-8') as f:
            for line in f:
                line = line.strip()
                #print line
                line = line.replace('}', '').replace('{', '').replace('|', '')
                line = line.replace('<unk>', ' | ')
                if eos:
                    line = line.replace(eos, '')
                #print line
                for word in line.split():
                    if len(word) > max_word_length - 2:  # space for 'start' and 'end' chars
                        word = word[:max_word_length-2]
                        #print word
                    #word=word.lower()
                    word_tokens[fname].append(word_vocab.feed(word))
                    #print word_tokens[fname]
                    char_array = [char_vocab.feed(c) for c in word ]
                    #print char_array
                    
                    char_tokens[fname].append(char_array)
                    #print char_tokens[fname]
                    actual_max_word_length = max(actual_max_word_length, len(char_array))
                '''
                if eos:
                    word_tokens[fname].append(word_vocab.feed(eos))
                    print word_tokens[fname]
                    
                    char_array = [char_vocab.feed(c) for c in  eos ]
                    #print char_array
                    
                    char_tokens[fname].append(char_array)
                '''

    assert actual_max_word_length <= max_word_length
    
    #print word_vocab.token2index
    #count_pairs = sorted(word_vocab.com(), key=lambda x: (-x[1], x[0]))

    #words, _ = list(zip(*count_pairs))
    #for r in words:
     #   print r
    #print len(words)
    
    
    print()
    print('actual longest token length is:', actual_max_word_length)
    print('size of word vocabulary:', word_vocab.size)
    
    print('size of char vocabulary:', char_vocab.size)
    print('number of tokens in train:', len(word_tokens['train']))
    print('number of tokens in valid:', len(word_tokens['valid']))
    print('number of tokens in test:', len(word_tokens['test']))
    
    r=codecs.open('/home/apatra/Desktop/work/tokens.txt','w','utf-8')
    for i in range(0,word_vocab.size):
        print i, word_vocab.token(i)
        r.write(word_vocab.token(i))
        r.write('\n')
    '''
    # now we know the sizes, create tensors
    word_tensors = {}
    char_tensors = {}
    for fname in ('train', 'valid', 'test'):
        assert len(char_tokens[fname]) == len(word_tokens[fname])

        word_tensors[fname] = np.array(word_tokens[fname], dtype=np.int32)
        char_tensors[fname] = np.zeros([len(char_tokens[fname]), actual_max_word_length], dtype=np.int32)

        for i, char_array in enumerate(char_tokens[fname]):
            char_tensors[fname] [i,:len(char_array)] = char_array

    return word_vocab, char_vocab, word_tensors, char_tensors, actual_max_word_length
'''

In [43]:
load_data('/home/apatra/Desktop/work/data',65)

('reading', 'train')
('reading', 'valid')
('reading', 'test')
()
('actual longest token length is:', 63)
('size of word vocabulary:', 24226)
('size of char vocabulary:', 98)
('number of tokens in train:', 67152)
('number of tokens in valid:', 4256)
('number of tokens in test:', 4678)
0 Mtul’katlku
1 Wjit
2 Koqwajo’taqann
3 Mimajulnu’k
4 Wejkuaqml’tlj
5 Na
6 Ke’sk
7 penoqlte’taqan
8 aqq
9 esatite’tmaqan
10 wjit
11 koqwajo’taqann
12 wejku’iaqmi’titlmimajulnu’k
13 wetapeksi’kl
14 elue’wuti’l
15 ta’n
16 msit
17 wen
18 poqwajite’tikl
19 wejkwite’tmkwsitqamu
20 tett
21 alsutk
22 teluej
23 teli-ktlamsltasij
24 alsutkpmawsin
25 keskmna’q
26 we’kwata’sik
27 kisna
28 kewisinu
29 ,
30 na
31 wla
32 kisi
33 tlapukuemk
34 msitmimajulnu’k
35 tliktantunew
36 mawi
37 espe’
38 .
39 nuta’q
40 tijiw
41 ji’nm
42 mu
43 ketu’
44 kisa’lawt
45 eliapsin
46 mitakespiagewe
47 matneke
48 matnmn
49 kestawite’taqan
50 emekwo’tasim
51 koqwajo’taqannwejku’laqmi’tl
52 mimajulnu’k
53 miamuj
54 etli-ankwo’tasikl
55 tplut

1113 e'plowa'lukuti'k
1114 waltestaqnji'jk
1115 tenu
1116 Waltestaqnk
1117 metue'kik
1118 eliu
1119 i'we'wmi'tipn
1120 tia'mue'l
1121 waqntal
1122 qalipue'l
1123 pasi
1124 metue'nukip
1125 mesnmik
1126 ketantaqitipni'
1127 wowjiktuk
1128 i'wjitu'tipn
1129 nekmowe'l
1130 klu'ktipn
1131 weli
1132 piatek
1133 ewe'wasikl
1134 wenjitia'
1135 mue'l
1136 waqnta
1137 mesnmin
1138 wejaqma'tu'nl
1139 jiklkmiaq
1140 wi'n
1141 mime
1142 Ela'kipuljik
1143 wasoqite'jik
1144 pasei'ti
1145 la'qittaqn
1146 kloqwej
1147 ewi'
1148 kit
1149 waltestaqniktu
1150 Asukom
1151 te'sijik
1152 eli'ji
1153 Kitmaqnk
1154 metuei'k
1155 Kmu'ji'japi
1156 sipikk
1157 laska'tum
1158 kitmaqnk
1159 wejiujik
1160 kmu'ji'jap
1161 ne'sijik
1162 kisukuw'
1163 skwa
1164 Naniska'ql
1165 kmu'ji'j
1166 Kisikuo'p
1167 etuwiw
1168 walnoqsasit
1169 kisikui'skwaq
1170 anapsasiji
1171 Kitmaqnn
1172 teluisit
1173 kisikuwi'skwaq
1174 telui'tujik
1175 tqamuewe'
1176 pqoji
1177 waltesta'titesnu
1178 papuaqn
1179 wsuwa'toq
1180 ki'kajeyu
1

2305 wl
2306 nemitumk
2307 kesnukwat
2308 nepk
2309 kinuwa'taqn
2310 kesi
2311 mimajuinu'l
2312 pneltit
2313 mimajuinu
2314 pnet
2315 newtukwa'lukwe
2316 Pejita'jik
2317 elakut
2318 elta'titl
2319 apiksiktatultiji
2320 pekijipnet
2321 me'kaqi
2322 kisa'tekek
2323 na'twenl
2324 eskamalatl
2325 kisit'sultilik
2326 alasutma'tiji
2327 Ktikik
2328 wskwijnu
2329 wikm
2330 pejo'tu'tij
2331 sulieweya'la'tij
2332 pnej
2333 ne'wtapaqmia
2334 ekitmi'titl
2335 aniapsimkewe'
2336 Summink
2337 nespikima'tiji
2338 el
2339 ketlamsitas
2340 pa'qapukuwe
2341 lo'q
2342 meske
2343 nemi'n
2344 elisin
2345 npo'qnek
2346 Mal
2347 alasutmaqnn
2348 nenqi
2349 kitmi'tikl
2350 wskwjinu
2351 ewilamiej
2352 waju'taq
2353 lami
2354 kespisawe'k
2355 wkwatl
2356 tekikl
2357 wqusi'k
2358 piluamukweta'li
2359 Wpukwikl
2360 poqj
2361 saqpiku'niaql
2362 aptitek
2363 Pem
2364 awisiw
2365 Pikwelken
2366 tluetew
2367 welinpawanek
2368 newtitpa'q
2369 mettewe'k
2370 mettetesk
2371 npek
2372 wije'tultita
2373 kiwalsultiji
237

3326 'npisite
3327 mimajuinualsiap
3328 unaqa'lite
3329 Glaman
3330 getlams'tuit
3331 'msntew
3332 gsalaji
3333 newgtunatl
3334 nplin
3335 ugtlue'utiwal
3336 getlams'tuatl
3337 mn'tua'gig
3338 liegw
3339 awna
3340 guguntew
3341 wa'so'
3342 wej
3343 mimajuinuigtug
3344 ugs'tawian
3345 la'lan
3346 ilsuman
3347 ugtapula
3348 'Ms't
3349 ilsutasig
3350 getlams'tuigw
3351 ilsutasit
3352 gesatmi'tij
3353 pata'taqatiji
3354 ilsumujig
3355 pata'tegej
3356 masgelt'g
3357 wasetegigtug
3358 puatmug
3359 musga's'gt
3360 gigwaji
3361 tela'tege
3362 puatmlij
3363 jigs'tuat
3364 musga'tutew
3365 jigs'tuatl
3366 wesgutg
3367 te's'g
3368 nemitoqop
3369 nutg'
3370 jigs'tuagu
3371 gtlams'tasitl
3372 ugglusuaqanml
3373 teliaqewe'
3374 Ewjilitl
3375 petgimapn
3376 wesgutgl
3377 ignmuatl
3378 gugunata
3379 gsalatl
3380 iga'tuaj
3381 ugpitn
3382 msntew
3383 elistuatl
3384 msnmug
3385 ilsumatal
3386 ugtapulatal
3387 iapji
3388 nutma'tijig
3389 pugwelgig
3390 el'ta'liji
3391 Sa'neg
3392 sign'tugsinew
3393 majul

4565 gelulateg
4566 majulgwalit
4567 pmiegw
4568 ugmimajuaqanmg
4569 'gjitutew
4570 telima'tit
4571 ginualsin
4572 egsue
4573 ugtapsuninu
4574 ginualsia
4575 teluei
4576 wejia'p
4577 geituoq
4578 ilsutaqatioq
4579 nemituo
4580 ilsuma
4581 ilsutaqa
4582 tetapu
4583 ilsutega'
4584 Ntilsutaqanm
4585 tetapu'atew
4586 newgtugwa'lugo
4587 wijitgweiwi
4588 Ewi'gas'g
4589 'gt'tplutaqanmua
4590 tapusijig
4591 nemi'tij
4592 ilsuteget
4593 ingutiw
4594 teluetij
4595 geitu'g
4596 Etlewistu
4597 etlewistoq
4598 nenuiwoq
4599 nenuawoq
4600 Nuj
4601 nenuaqoq
4602 teluetl
4603 gina'muej
4604 tepege'tij
4605 ugsua'lagul
4606 “Jigla'sites
4607 gwiluitoqs'p
4608 'Nputititoqs'p
4609 apigsigtmugsiwo
4610 pipanimtultiji
4611 ne'pa'si
4612 tleiawi
4613 tleiaultioq
4614 tlimuloq
4615 'npu'titoqs'p
4616 apigsigtmugsiwoq
4617 'gtlams'tasultioq
4618 'gtlams'tasultiwoq
4619 'nputititoqs'p
4620 'gtlue'utiwaq
4621 apigsigtuam'gi
4622 mujga
4623 ilsutmultoqs'p
4624 pugwelg
4625 goqweigtug
4626 pugwe'l
4627 gina'mult

5575 ns'tasulti'g
5576 elue
5577 ni'negugjit
5578 'npila
5579 tliaqmuta
5580 'gtlams'tuig
5581 teluepnaq
5582 gepmite'lm'gwi'tital
5583 westaugsi'gwe
5584 Wesgumapnn
5585 petgimatl
5586 gepmitelmu
5587 iganpugua'tiji
5588 wesgutmi'ti'gw
5589 “Tuege'tital
5590 getlams'tg
5591 gesatmi'tip
5592 ul'te'lmugsinew
5593 ilsumgutaq
5594 nemi'
5595 ignmaqig
5596 Wejgu'eiap
5597 nutmuij
5598 menuaqalig
5599 wesua'tuig
5600 ilsumata
5601 wesgutma
5602 'ntangita'suaqanm't'pn
5603 telimip
5604 'pgisitugsitoqs'p
5605 tlimuline
5606 Geji'g
5607 [Etna
5608 gisteju
5609 Nasoqtesgmegewe'g
5610 'mg'sng
5611 nasgua'tiji
5612 'mjega'tutal
5613 uggwatua
5614 ugtejgewe't'p
5615 Siawa'timgewe
5616 'nqatmn
5617 ujje
5618 gesalajig
5619 enmi
5620 uloqotalultiji
5621 nu'gwalatl
5622 Isgaliot
5623 'ntuisgetun
5624 ignmat'p
5625 assutm
5626 Geitoq
5627 apaja'sitew
5628 unaqa'sit
5629 p'ta'utigtu
5630 Mena'latl
5631 uggotmual
5632 gigtoqopilg
5633 gasigo'suti
5634 ugtassuig
5635 ta's'g
5636 guta'toq
5637 ugsispasuti

6815 elugwatgl
6816 ugsisguaq
6817 ewi'gas'gtnugul
6818 wi'gatignigtu
6819 ewi'gas'gl
6820 'gtlams'tmultoqs'p
6821 gtlams'tmoq
6822 'msntoqs'p
6823 nemi'jig
6824 Galali
6825 (Taipilias
6826 Sepeti
6827 ultesgatultieg'p
6828 'nsis
6829 Ji'me
6830 telimugsie
6831 usge
6832 Telimg't
6833 “Wije'ultesne
6834 El'ta'ieg
6835 tepita'ieg
6836 ugtuluo
6837 ne'pa'tue
6838 Wejgwapnia
6839 Nemi'g'tt
6840 jajigtug
6841 nenuaqatt
6842 Sesgwalugsie
6843 “Nitaptu
6844 usg'tio
6845 asitemg'
6846 “'Lpa
6847 “Iga'lug
6848 'gta'pimuoq
6849 'gtluaq
6850 'msnatoqs'
6851 tela'taqatieg'p
6852 tepo'laqatt'p
6853 'nta'piminen
6854 'ntulnaq
6855 wesam
6856 telimg
6857 “Gjisaqamawminu
6858 Gjisaqamawili
6859 ugtatla'im
6860 (pitu'
6861 pija'latl
6862 ugpita'qawe'l
6863 paqasi
6864 unaqiet
6865 na'taqama'
6866 esgwieieg
6867 siawqatmu'tieg
6868 'ntulna
6869 na'taqama'l'g't
6870 waju'et
6871 'nta'pimine
6872 amasenugup
6873 sitmug
6874 leppi
6875 na'taqamita'ie
6876 nemitueg
6877 nugsaqatal
6878 pugtewigl
6879 nme'j

7877 mime’juey
7878 elkitasik
7879 ntuiskasikt
7880 Nikana’toqsip
7881 Laurence
7882 Murra
7883 tley
7884 tke’snuk
7885 teli-assoqma’tasik
7886 samqwan-iktuk
7887 malsano’kuo’
7888 Ke’kwe’k
7889 etl-klo’tasiksip
7890 tapsunn
7891 a’tla’wekne
7892 Ilta’tasiksipnek
7893 malsano’kuo’mek
7894 koqoeyek
7895 weskwia’q
7896 elkitasiksipnek
7897 Cheticam
7898 Kepjoqa’tasikek
7899 Robin-ewey
7900 malsano’kuom
7901 Keptin
7902 Thomas
7903 pekwatelkisnek
7904 wte’piteml
7905 nutsaqike
7906 Pekije’kek
7907 Robin
7908 pejiwsitek
7909 pejiwsultisni’k
7910 nujilukutitki’k
7911 nutankua’titki’
7912 We’kaw
7913 Clement
7914 Huber
7915 Jean
7916 J
7917 Janvri
7918 wiaqpultisni’k
7919 ala’tu’tisni’kl
7920 malsano’kuo’mk
7921 Pemi-ajelkisnik
7922 ewi’ta’tiji
7923 nuku’
7924 eymu’tiksipnik
7925 pekisulapnik
7926 pile’k
7927 wejita’jik
7928 Jerse
7929 Aklasiewki’k
7930 Wenujki’k
7931 Platisnaqki’
7932 Wenujk
7933 klujjiewto’simk
7934 alasutmaqney
7935 peji-wsimkutisni’k
7936 L’uipu’lk
7937 kespu’tmumkek
793

9074 ewi’ki
9075 kelutk
9076 tlawtukwelital
9077 ela’tulij
9078 Mulapukue
9079 Me’
9080 kis-wi’kikl
9081 apu’kwettal
9082 weli-apankituatl
9083 smaqa’tulijl
9084 wli-anko’ta
9085 etlatali’ti
9086 wesku’tmi’tij
9087 nuta’tew
9088 Jipuktuk
9089 wetkitmne
9090 tepi-pkije’nuk
9091 kiskajeyinew
9092 tapatatk
9093 Guernse
9094 etuk
9095 kelumanew
9096 ika’ti
9097 Aji-wetqane’k
9098 Islands
9099 aji-nqasi-mena’taqumk
9100 puksi
9101 keltaqami’
9102 Ewi’kmi’tij
9103 Kaqatali’ti
9104 keta’muatl
9105 kelutmlij
9106 kisi-lukewknew
9107 Mackinnone’ji’jk
9108 l’pa’tujk
9109 wapamukwa’tunew
9110 wenji’kuom
9111 lakkla’ns
9112 wkwe’ji’juaq
9113 apoqntmnew
9114 kesistaqnasikl
9115 pla’kitl
9116 eljaqasikl
9117 msaqtaqt
9118 Ika’q
9119 waqama’tekem
9120 nutmat
9121 ika’taqnewinu’k
9122 tle’k
9123 Sutik
9124 atankuala’tijik
9125 jijklue’wji’jk
9126 Plaster
9127 Cove
9128 pem-kaqiaq
9129 ekntie’wut
9130 Kelu’sultilijik
9131 to’
9132 telimsi
9133 wi’kikewatal
9134 wel-nenuatl
9135 ika’taqnewinu’l
9136 pkw

10181 weskweyaq
10182 kulkwi
10183 skik
10184 pekwatuek
10185 wultiek
10186 pni
10187 almantiewaqi
10188 koqma
10189 I
10190 wa
10191 kamikl
10192 masiap
10193 aklasie
10194 sin
10195 eliewap
10196 tewijeyanek
10197 Telo
10198 tmas
10199 metui
10200 mtue
10201 ekina
10202 masimk
10203 simk
10204 nui
10205 Piltuo
10206 tmap
10207 Ke
10208 masianek
10209 nsis
10210 nesutmalsewip
10211 matney
10212 mua
10213 wenjui
10214 kitnmay
10215 si
10216 Menuaqalipnik
10217 masin
10218 elukwenuk
10219 ankaptmn
10220 matnewey
10221 nue
10222 titl
10223 wutanmuaq
10224 mujik
10225 puktaqi
10226 sultinew
10227 Welkaqnik
10228 maqeyuksi
10229 penoqo
10230 tasi
10231 Wejkwikeyanek
10232 tasimk
10233 kesaluksik
10234 Metue
10235 kataq
10236 msisku
10237 Elisultijik
10238 tupsi
10239 saqlia
10240 Niskamijaq
10241 tlimipnaq
10242 skmal
10243 tinew
10244 tukia
10245 titaq
10246 nata
10247 kate
10248 nenmi
10249 kelu
10250 kemk
10251 Ekinua
10252 tuip
10253 tuap
10254 wejkwa
10255 taqnik
10256 wniksamijl
1025

11324 eli-pkijitk
11325 sipu’
11326 ju’snn
11327 munsa’matk
11328 qasqi-kjikmMi’kmaq
11329 kepmite’tmi’titlAnkumkamkewe’l
11330 apune’kwi’tiji
11331 wunaqapemu
11332 jijaqmijk
11333 Sa’n-Patist
11334 Kopo’qWnaqa’tunen
11335 npitnokominal
11336 staqe
11337 kitpu
11338 wnisqi’Wenaqintu’tiek
11339 sisipaqKepmleketaiek
11340 kjitmiwWjit
11341 wantaqo’ti
11342 witaptimkeweyUla
11343 maqmikek
11344 Mi’kma’kiAq
11345 telua’tiek
11346 “Kwe’”
11347 “Pjila’si”
11348 Sa’n
11349 Patis
11350 Tenio’
11351 Saqmawaq
11352 Alasutmelsewanej
11353 meskita’simkewey
11354 aknutmaq
11355 No’kmatut
11356 aknutmaqniwtuk
11357 kinua’tulek
11358 saqmaminua
11359 Kisu’lkw
11360 wksua’lata
11361 mekwaye’k
11362 ntininena
11363 no’kmatu
11364 ksaqmaminuaq
11365 kaqia’q
11366 wmimajuaqnemek
11367 te’suknitaq
11368 tepknuse
11369 asukuom
11370 atjietek
11371 eksitpu’ke
11372 Sankew
11373 kaqietaq
11374 msenkek
11375 kjialasutmaqnek
11376 Westawu’lkw
11377 naqtemulkusna
11378 jentawi
11379 npu’tinenu
11380 Tewije’kaq

12539 Klutjewto’tek
12540 nepsa’lu
12541 wtinin
12542 ewksami
12543 ksinukwaqsi
12544 Wmaltem
12545 telijuik
12546 istaqe’
12547 jipu’ji’
12548 temik
12549 wa’qijel
12550 paqalaptmik
12551 nisia
12552 wisqnek
12553 wettimu’sipne
12554 tepne’kleweyek
12555 ketaqamo’tlulkisne
12556 Kinu
12557 elue’wulti’k
12558 pewjalqu
12559 winiplnu
12560 ktanu
12561 eji
12562 ne’po’
12563 Ntaqo’qoniktuk
12564 ekulami
12565 netna
12566 kmu’jiktuk
12567 msita
12568 pepso’teke
12569 Klutjiewta’sit
12570 weskaqelmane
12571 Kji’niskamewiktuk
12572 i’pajiemtoqalanej
12573 TopSAQAMAW
12574 NUJEYWIT
12575 nujeywit
12576 pmawsiteskisa’lit
12577 msaqanisma’sinela’lit
12578 mijipjewaqaniktuktepow
12579 wantaqpaqte’jkNjijaqamijl
12580 apjila’lajlaq
12581 elkoqatawti’kmuitawti’l
12582 eltekl
12583 sape’wutiktukwjit
12584 teluisitIa’j
12585 mpuaqanipniktuk
12586 i’alimtimaiwine’j
12587 jipatmunulsukuin
12588 nipisoqonmaq
12589 ktaptu’n
12590 mlkitaiPatalutim
12591 wskaluaqta’nik
12592 ketanijikmima’tuin
12593 nunji

13573 Sikniktewa
13574 Kespekewa
13575 Kepten
13576 (district
13577 chief)
13578 ut-Mikemaimua
13579 tapunemikel
13580 ut-lukuwaqenmua
13581 Alsusit
13582 keptinewuti
13583 pematoq
13584 ekji-mawitplotaqetimk
13585 Mawiomi
13586 (Johnson
13587 Nan
13588 kaskimtlnaqiponqeke
13589 Apaqtukewaq
13590 oqoltititek
13591 jajiktoq
13592 teltasijik
13593 oqoltijik
13594 elmimtaqami
13595 Mikmaq
13596 weltasualatiji
13597 teluwituoltijik
13598 Lnu
13599 Skijinu
13600 Wejatekemkek
13601 weketutij
13602 Micma
13603 jijuwaqa
13604 estekej
13605 Mic
13606 Wenujuwikto
13607 Micmaqu
13608 klusuwaqen
13609 Mikkmaq
13610 nikma
13611 teluwaq
13612 my
13613 kin-friends
13614 (Whitehead
13615 wikasi
13616 Mikma
13617 Smith/Francis
13618 wikaqen
13619 elmiutaqamuk
13620 unemaki
13621 wikikaqenn
13622 (Mikmaq
13623 ewasik
13624 Mikmaw
13625 newtejit
13626 wikikaqeniktuk
13627 weketasiktew
13628 wikaqenn
13629 piluwi
13630 (wikasiktew
13631 italics)
13632 ektekel
13633 wekasiktal
13634 saqewel
13635 akenutema

14652 Kisiku
14653 ellen
14654 robinso
14655 ukumuljiniskaq
14656 tewje
14657 mikwitetk
14658 lnuiamalkamkel
14659 weskowasik
14660 Robinson
14661 etlkisikwet
14662 (Bear
14663 River
14664 epketesenuk
14665 Elmiwktaqamu
14666 Kekunkel
14667 napuwikaqnn
14668 wijew
14669 tlek
14670 ut-lukowaqenmuow
14671 nasisawemkel
14672 amalekenujel
14673 naskemitisn
14674 ut-kiniskwejuwel
14675 akusennmual
14676 bolero-ewikit
14677 weskitqwanjij
14678 pitaqsek
14679 isey
14680 makkot
14681 kenetek
14682 Kisikuk
14683 mikwitetemitijel
14684 tekenel
14685 amalkatemitikuwel
14686 kinametinewey
14687 telikisitasik
14688 sikentasit
14689 (th)
14690 kaskiwel
14691 tapuiskaq
14692 kaskimtlnaqenipunkekek
14693 akenutematemitisen
14694 c)Amalkewaqnn
14695 Alsutemaqnnewe
14696 pematasik
14697 eddore
14698 maliewimk
14699 utanel
14700 Amalkamk
14701 kepijoqatoqel
14702 Jeddore
14703 weskitjaqikn
14704 kulkunawey
14705 (icing
14706 on
14707 cake)
14708 (Personal
14709 Kejituek
14710 apajipunkek
14711 kaskimtlna

15756 wiaqatoq
15757 ut-ektlamsetaqeneymuo
15758 welitetemitikw
15759 telitasuwaqe
15760 Mikemaw
15761 nakusetewiamalkam
15762 Piley
15763 kejit
15764 telimultes
15765 aul
15766 nkwisk
15767 newtejuwamuk
15768 kwenki
15769 Nekaw
15770 ujuwal
15771 amalalitel
15772 skijinuwultitite
15773 telimataq
15774 unjanwa
15775 utniskamijuwal
15776 nakusetewi
15777 laman
15778 ankateme
15779 ninenewey
15780 ninenewe
15781 atasikte
15782 Nekemewey
15783 teluwitesk
15784 kilewe
15785 amkwesewaj
15786 nisaskenasiktew
15787 apajat
15788 Nkwisk
15789 nujijk
15790 tapuewajewultitaq
15791 sistewa
15792 nukutenuk
15793 pipanimkewey
15794 teluitaq
15795 nekemowkewey
15796 (William
15797 Pemiaq
15798 newkel
15799 iknemuwamek
15800 nakusete
15801 usitkam
15802 taqteke
15803 Ikalsit
15804 amalkan
15805 apoqenmuan
15806 kesinukutijik
15807 etawaqtemat
15808 ukjiksun
15809 jiloltine
15810 Nakuset
15811 Ewiamalkamk
15812 alsutmaqene
15813 kaqemutemitij
15814 melkikenuti
15815 kinamaqen
15816 tetpaqateken
15817 p

16823 lakkla’ns-iktuk
16824 kispate
16825 Betty
16826 Stockley
16827 “Etuk
16828 welta’suatmi’tisnl
16829 amskwesewe’l
16830 mulinn
16831 metue’kip
16832 ela’kittekem
16833 temasqita’sikl
16834 kmu’jl
16835 tmi’kn-iktuk
16836 naskoqta’sikl
16837 sasqateta
16838 elatejitumkl
16839 walqumikan
16840 mulin
16841 poqji-wekasimke
16842 etekipnl
16843 wuta
16844 Saqpi’tij
16845 tekle’jit
16846 oqatnuke’l
16847 Brook
16848 Mulqate’kemk
16849 mulqate’kemit
16850 etl-mulqa’timki’kl
16851 eteksipni’kl
16852 Mawelkisnl
16853 etl-mulqatmumk
16854 Gypsu
16855 Gypsum
16856 ntlu’tewamu’k
16857 nukwiaq
16858 kun’te
16859 wiskipow
16860 Etl-mulqemk
16861 Gypsumey
16862 Whitty
16863 Shore
16864 Beach
16865 Crossing
16866 Roa
16867 Poqjitasiksip
16868 Etquljuiku’
16869 Asp
16870 pasteskiksip
16871 atlika’simk
16872 tel-pase’k
16873 na’taqma’tun
16874 wskijikuekewey
16875 telitum
16876 Puljaynewawti
16877 ika’tasksip
16878 puljaynl
16879 pqutayjitaq
16880 mulqutipnik
16881 Canada
16882 Cement
16883 Mulian


17905 ewle’juaqnike
17906 apoqnmua’tisnik
17907 entu’tis
17908 lukwaqnmue
17909 iloqamuj
17910 sqna’qwe’k
17911 tel-kaqi’sk
17912 tewliketa’ji
17913 Sipukewaq
17914 tel-nenujik
17915 teli-melki-ikatmi’tij
17916 tel-mawikwam
17917 tel-lukwemk
17918 etlankuamkl
17919 wetnu’kwatkis
17920 kisi-ntui’sketun
17921 (we’kaw
17922 MinMetalsaq
17923 China
17924 ntui’skmas)
17925 elmi-kespiaq
17926 wela’sinuke
17927 Elmi’knik
17928 ila’tumk
17929 “Tar
17930 Ponds”
17931 tl-we’wten
17932 kaqamikip
17933 kjijitu’n
17934 Related
17935 Materials
17936 Sectio
17937 Aji
17938 nqamasa’lukwi’tij
17939 eltaqnawatmi’ti
17940 pituimtlnaqn
17941 pemi-punqeke
17942 metua’lukwi’tis
17943 pekija’lukwi’tis
17944 pitnituk
17945 nalkwe’mi’tis
17946 jijklue’wjuapi
17947 eltaqnawemkewe’
17948 sa’qati’k
17949 l’taqte’kne’
17950 nalkwe’kemk
17951 kisi-eltaqnawatmumk
17952 waieknapi
17953 elisknuemk
17954 nalke’wmum
17955 piptoqipska’tumk
17956 (rolagan)
17957 kisi-eltaqnawatte
17958 Pitne’l
17959 nalkwe’knl
17960 wejit

19012 Pekiji-mkumie’ja’tekeyap
19013 eliwsiwanek
19014 Kejikiya
19015 eliwsiap
19016 Nuji-pipanikesi
19017 Tale’ksip
19018 poqji-lukwenek
19019 te’sipunqekip
19020 alsutmuane
19021 tekteskuksi’si
19022 Ne’w
19023 Tekteskuksiap
19024 npisewiknk
19025 kewteskuksia
19026 me’teskuksi
19027 nunje
19028 nekmaq
19029 telialsikl
19030 tekteskawe
19031 me’teskuij
19032 mnteskisoq
19033 nunjie
19034 Ala’tuap
19035 tmi’knatp
19036 elapeka’taqip
19037 wunje
19038 ne’pa’yin
19039 ne’po’ltis
19040 Moqwe
19041 ntlu’tewamuksitaq
19042 te’sipowa
19043 Mikwite’lmt
19044 E’
19045 mikwite’lim
19046 wela’matultiekik
19047 weji-ksite’kipna
19048 ne’pe’kipna
19049 Dunca
19050 pekwateluapna
19051 Pekiji-kleywata
19052 wantaqapukuitaq
19053 mkumie’ja’lu
19054 Ika’pnaq
19055 Tracye
19056 Wesimuktuksiekipna
19057 Ala’likipni’k
19058 winqamiksultitki’
19059 nasa’taqapnik
19060 kelpilasi
19061 apkwa’lsi
19062 kewa’liki
19063 Me’si-wsimkwa
19064 tekweywi
19065 Nsitue’k
19066 Georg
19067 tkweywiso
19068 Malcolm
1906

20091 tel-we’wmumkl
20092 tel-kina’muksin
20093 ketanuj
20094 nme’j
20095 tia’muk
20096 Wije’wm
20097 Tali-pilua’sikl
20098 wejkwikwe
20099 Barr
20100 kisteltesk
20101 nikoql
20102 E-Ba
20103 Pukweli-wktapia’tijik
20104 ewe’mi’tijk
20105 ketana’tij
20106 plamu’
20107 Wejkikweyanek
20108 We’koqma’
20109 i’we’wmekipnl
20110 na’puktaqnn
20111 plamue’ka’tie
20112 wenaqeken
20113 kelnmn
20114 na’puktaqn
20115 eleken
20116 na’pu’ktaqn
20117 kwetapia’tiek-ewe’wmek
20118 kinikwek
20119 kisitu’n
20120 sipaqate’t
20121 plam
20122 kisi-sipaqate’j
20123 plamu
20124 naqtmn
20125 siaw-nastektn
20126 npite
20127 tla’taqtieki
20128 mkikn
20129 kmukk
20130 Pem-pilua’sik
20131 pilua’tekemk
20132 ekina’masin
20133 kaqi-tepiepnik
20134 awije’jijik
20135 siawitu’tij
20136 telo’timki
20137 aji-kate’kemk
20138 wi’kipaltim
20139 telatalum
20140 ksnukwaqnn
20141 mesnmi’tij
20142 pitu’po’
20143 juji’j
20144 nasa’sit
20145 mentank
20146 kisi-wtapiek
20147 ktanteke
20148 te’si’k
20149 nikwenuksi’k
20150 tlikwenat

21308 Lassui’pi’skwaq
21309 maliaptmi’tisnl
21310 netui’ske’tij
21311 wujjuaq
21312 alankua’ti’tij
21313 nujo’kisnl
21314 al-waqama’tekem
21315 nuta’jik
21316 al-waqama’tekete’wk
21317 pisko’lusnik
21318 Kantakwe’skwaq
21319 koqqwaje’ksip
21320 tl-piskweta’tij
21321 Kantakwejk
21322 maw-pukwelkipnik
21323 pukweli-ankua’tu’tij
21324 poqji-pejita’snik
21325 tkweywa’tijik
21326 wujjua
21327 siaw-tla’siksip
21328 pejita’te’wk
21329 mikwite’lmkwi’tikik
21330 nikanita’pnik
21331 keska’tusnek
21332 ajipjutaqnmue
21333 awije’jk
21334 tetapua’ltimk
21335 lukwaqn-iktuk
21336 Mil-lukutisni
21337 l’lutaqnatkw
21338 elkwi’tmi’tisnl
21339 lukwaqnewo’kuom
21340 muline
21341 piluo’tasimk
21342 kaqa’tu’tis
21343 Wejkwi-asoqma’timk
21344 welteskua’tijik
21345 epite’sk
21346 telita’sultiliji
21347 siwqatk
21348 we’kwata’sit
21349 wejkwi-teppit
21350 miso’qo
21351 welteskuaj
21352 elma’lij
21353 kisi-wlte’skuaj
21354 malie’wi’tijl
21355 kisikui’sk
21356 siku’sk
21357 pukwelipunqekl
21358 waqama’teket
2135

22321 na’talitpiet
22322 wpukwikmaja’sik
22323 amalsiktmat
22324 mettewe’k
22325 alame’s
22326 l’nuikilja’tijik
22327 l’nuwintu’tiji
22328 ntuwi’skasito
22329 A’newiktuk
22330 Eskisoqni
22331 tesukna’
22332 ma’penoqo’tasik
22333 assitmuk
22334 awtinu
22335 tluektn
22336 tllukwen
22337 tli-ksika’tutal
22338 jiptekiknmuiek
22339 wekwayuinamejikelp
22340 Mesnmɨk
22341 Knua’taqan
22342 wlmajo'ti'
22343 kitnmeyakwi'titl
22344 teli-klu'lkl
22345 istue'kl
22346 wtl'qamiksutiwal
22347 tel-pma'tu'ti'titl
22348 telo'ltl'tij
22349 tel-nuta'q
22350 pejiwsultite'wk
22351 teli-kjijiujik
22352 wtapeksultinew
22353 nike'ke'sk
22354 eimu'ti'kw
22355 nemitu'kw
22356 toqwa'tuekl
22357 npitnnal
22358 kekinua'tekeyek
22359 weji-tqwa'tuekl
22360 tel-puatmek
22361 knu'kwaqnminal
22362 nmitasin
22363 nenasin
22364 Kotokiskahkiyil
22365 olu
22366 wakac
22367 kiluwaptomuhtit
22368 wiwonaskahsuwiki
22369 Pomi
22370 wahkehsuwiw
22371 kette
22372 amsqahs
22373 leyu
22374 wahkehsihtit
22375 mecinhoticik
22376 kehso

23571 kiseyasikw
23572 (sɨtke’
23573 ankweywatl
23574 kisio’pla’lutp
23575 telopla’teke
23576 ilsumutɨ
23577 keytu’n
23578 ta’nwen
23579 ‘opla’lsɨkɨ
23580 ‘knua’taqan
23581 kisie’wasɨktɨtew
23582 ilsumuj
23583 e’wasɨktɨtew
23584 mawita’tij
23585 NUK
23586 ilsutmi’tij
23587 apiksiktuksin
23588 apatkimuksin
23589 kisio’pla’tekep
23590 wikin
23591 teliltaqte’mulk
23592 tllimam’k
23593 pictured
23594 frame
23595 presented
23596 kisa’toq
23597 tel-knekk
23598 pekisuluksi’
23599 wejinmitasitewit
23600 be
23601 seen
23602 Camero
23603 wesua'tuek
23604 Unaited
23605 tel-tepkisa'toq
23606 telwl'te'tmek
23607 ajipjulkijjik
23608 Sko'siaewaq
23609 --
23610 kina'masultinew
23611 kjijitunew
23612 teli-milpntijik
23613 apoqnmuksinen
23614 ‘nsanoqona’tas’ktn
23615 ulo’tinenu
23616 wenwi’tmn
23617 weitaiek’p
23618 mawimtue’k
23619 tewjisa’q
23620 eimu’tiek
23621 Kpno’lewuti
23622 iktikek
23623 kitmnew
23624 kis-wi'km
23625 kis­pmiaq
23626 (-
23627 kjijitu'tij
23628 aji-wlpiatew
23629 telo'ltim
23630 e