# Part 1 - Building a Language Model

In [1]:
import nltk
from nltk.collocations import *
from datetime import datetime
from collections import Counter
import os
import psutil

In [2]:
from sklearn.model_selection import train_test_split
import numpy as np

In [5]:
pid = os.getpid()
extraction_start = datetime.now()

file = open('Corpus\korpusmalti1.txt')
corpus = file.read()

tokens = nltk.word_tokenize(corpus)
RemoveSymbols(corpus)

py = psutil.Process(pid)
memoryUse = py.memory_info()[0]/2.**30
extraction_end = datetime.now()

extraction_time = dict()
extraction_time['extraction_time'] = extraction_end - extraction_start
print('Extraction Time(HH::MM:SS:ms) - {}\n\n'.format(extraction_time['extraction_time']))
print('Memory Use: ', memoryUse, 'GB')

Extraction Time(HH::MM:SS:ms) - 0:00:01.694533


Memory Use:  0.11978530883789062 GB


In [3]:
def RemoveSymbols(corpus):
    arr = []
    symbols = "“”‘’!\"#$€%&()*'+-,./:;<=>?@[\]^_`{|}~\n"
    
    for i in corpus:
        if i not in symbols:
            arr.append(i)
            
    return arr

In [6]:
bigrams = nltk.bigrams(tokens)
freqdist = nltk.FreqDist(bigrams)
for i,j in freqdist.items():
    print(i,j)

('<', 'text') 11
('text', 'id=') 11
('id=', "''") 1404
("''", 'academic1') 1
('academic1', "''") 1
("''", '>') 1404
('>', '<') 1730
('<', 'p') 316
('p', 'id=') 316
("''", '0') 22
('0', "''") 22
('<', 's') 1077
('s', 'id=') 1077
('>', 'L-') 117
('L-', 'DEF') 122
('DEF', 'il-') 3077
('il-', 'null') 3086
('null', 'gÄ§an') 35
('gÄ§an', 'NOUN') 35
('NOUN', 'gÄ§an') 35
('gÄ§an', 'null') 35
('null', 'prinÄ‹ipali') 9
('prinÄ‹ipali', 'ADJ') 10
('ADJ', 'prinÄ‹ipali') 10
('prinÄ‹ipali', 'null') 10
('null', "ta'") 703
("ta'", 'GEN') 836
('GEN', "ta'") 843
("ta'", 'null') 1935
('null', 'Conectando') 5
('Conectando', 'NOUN-PROP') 3
('NOUN-PROP', 'Conectando') 3
('Conectando', 'null') 5
('null', 'Mundos') 5
('Mundos', 'NOUN-PROP') 4
('NOUN-PROP', 'Mundos') 4
('Mundos', 'null') 5
('null', '(') 127
('(', 'X-PUN') 149
('X-PUN', '(') 149
('(', 'null') 149
('null', 'Malta') 13
('Malta', 'NOUN-PROP') 13
('NOUN-PROP', 'Malta') 13
('Malta', 'null') 13
('null', ')') 143
(')', 'X-PUN') 149
('X-PUN', ')') 149
(

('ambjentali', 'null') 28
('null', 'soÄ‹jali') 47
('soÄ‹jali', 'ADJ') 48
('ADJ', 'soÄ‹jali') 48
('soÄ‹jali', 'null') 48
('null', 'politiÄ‹i') 8
('politiÄ‹i', 'ADJ') 7
('ADJ', 'politiku') 8
('politiku', 'null') 10
('s-t-gÄ§', 'jwasslu') 1
('jwasslu', 'VERB') 2
('VERB', 'wassal') 26
('wassal', 'w-s-l') 26
('w-s-l', 'gÄ§all-') 3
('null', 'faqar') 14
('faqar', 'NOUN') 12
('NOUN', 'faqar') 12
('faqar', 'f-q-r') 14
('f-q-r', ',') 6
('null', 'inÄ¡ustizzja') 3
('inÄ¡ustizzja', 'NOUN') 3
('NOUN', 'inÄ¡ustizzja') 10
('inÄ¡ustizzja', 'null') 10
('null', 'kunflitt') 3
('kunflitt', 'NOUN') 4
('NOUN', 'kunflitt') 5
('kunflitt', 'null') 5
("''", '7') 22
('7', "''") 22
('>', 'Minbarra') 9
('Minbarra', 'PREP') 9
('PREP', 'Minbarra') 9
('Minbarra', 'null') 9
('null', 'dan') 93
('dan', 'PRON-DEM') 105
('null', 'is-') 71
('null', "''") 211
("''", 'X-PUN') 262
('X-PUN', "''") 262
("''", 'null') 262
('null', 'kien') 47
('kien', 'KIEN') 53
('null', 'wieÄ§ed') 24
('wieÄ§ed', 'NUM-WHD') 24
('NUM-WHD', 'wieÄ§ed

('taÄ§riÄ¡', 'NOUN') 9
('NOUN', 'Ä§ruÄ¡') 9
('Ä§ruÄ¡', 'Ä§-r-Ä¡') 12
('Ä§-r-Ä¡', 'introduttorju') 1
('introduttorju', 'ADJ') 1
('ADJ', 'introduttorju') 1
('introduttorju', 'null') 1
('null', 'informazzjoni') 13
('informazzjoni', 'NOUN') 18
('NOUN', 'informazzjoni') 18
('informazzjoni', 'null') 18
('null', 'materjal') 7
('materjal', 'NOUN') 9
('NOUN', 'materjal') 12
('materjal', 'null') 16
('null', 'promozzjonali') 1
('promozzjonali', 'ADJ') 1
('ADJ', 'promozzjonali') 1
('promozzjonali', 'null') 1
('null', 'r-') 30
('r-', 'DEF') 40
('null', 'gÄ§at-') 14
('gÄ§at-', 'PREP-DEF') 19
('gÄ§-l-m', 'li') 2
('null', 'pprovda') 1
('pprovda', 'VERB') 1
('VERB', 'pprovda') 1
('pprovda', 'null') 1
('null', 'gÄ§enuhom') 1
('gÄ§enuhom', 'VERB') 1
('null', 'jagÄ§mlu') 18
('jagÄ§mlu', 'VERB') 21
('gÄ§-m-l', 'l-') 11
('null', 'Ä§in') 15
('Ä§in', 'NOUN') 15
('NOUN', 'Ä§an') 15
('Ä§an', 'Ä§-j-n') 15
('Ä§-j-n', 'tagÄ§hom') 2
('null', 'extra') 1
('extra', 'ADJ') 1
('ADJ', 'extra') 1
('extra', 'null') 1
('nul

('jieqfu', 'VERB') 1
('VERB', 'waqaf') 5
('waqaf', 'w-q-f') 5
('w-q-f', ',') 3
('null', 'jaÄ§sbu') 4
('jaÄ§sbu', 'VERB') 4
('VERB', 'Ä§aseb') 11
('Ä§aseb', 'Ä§-s-b') 13
('Ä§-s-b', ',') 2
('null', 'jieÄ§du') 12
('jieÄ§du', 'VERB') 18
("'-Ä§-d", 'deÄ‹izjoni') 1
('deÄ‹izjoni', 'NOUN') 1
('NOUN', 'deÄ‹izjoni') 1
('deÄ‹izjoni', 'null') 1
('null', 'jaqsmu') 9
('jaqsmu', 'VERB') 9
('q-s-m', 'l-') 6
('null', 'opinjonijiet') 3
('opinjonijiet', 'NOUN') 3
('NOUN', 'opinjoni') 3
('opinjoni', 'null') 3
('null', 'tagÄ§hom') 81
('null', 'oÄ§rajn') 15
('oÄ§rajn', 'NOUN') 10
('NOUN', 'ieÄ§or') 11
('ieÄ§or', 'null') 31
('null', 'favur') 6
('favur', 'PREP') 6
('PREP', 'favur') 6
('favur', 'null') 6
('null', 'aÄ§jar') 14
('aÄ§jar', 'ADJ') 14
('ADJ', 'aÄ§jar') 14
('aÄ§jar', 'null') 16
("''", '26') 15
('26', "''") 15
('>', 'Mir-') 1
('Mir-', 'NOUN-PROP') 1
('NOUN-PROP', 'Mir-') 1
('Mir-', 'null') 1
('null', 'raÄ§al') 2
('raÄ§al', 'NOUN') 2
('NOUN', 'raÄ§al') 6
('raÄ§al', 'null') 6
('null', 'Å¼gÄ§ir') 3
('Å¼

('IÅ¼vilupp', 'null') 5
('null', 'Bniedem') 3
('Bniedem', 'NOUN') 3
('q-j-s', 'kemm') 2
('null', 'jieÄ§u') 4
('jieÄ§u', 'VERB') 4
("'-Ä§-d", 'l-') 5
('null', 'post') 11
('post', 'NOUN') 13
('NOUN', 'post') 19
('post', 'null') 19
('null', 'proÄ‹essi') 5
('proÄ‹essi', 'NOUN') 6
('null', 'Å¼vilupp') 11
('null', 'bosta') 10
('bosta', 'QUAN') 13
('QUAN', 'bosta') 13
('bosta', 'null') 13
('null', 'nazzjonijiet') 1
('nazzjonijiet', 'NOUN') 1
('NOUN', 'nazzjon') 1
('nazzjon', 'null') 1
('>', 'Ir-') 6
('Ir-', 'DEF') 6
('null', 'rapport') 3
('rapport', 'NOUN') 3
('NOUN', 'rapport') 3
('rapport', 'null') 3
("Ä¡-j-'", 'aÄ¡Ä¡ornat') 1
('aÄ¡Ä¡ornat', 'VERB') 1
('VERB', 'aÄ¡Ä¡ornat') 1
('aÄ¡Ä¡ornat', 'null') 2
('null', 'jinkludi') 1
('jinkludi', 'VERB') 1
('null', 'kwantitattiva') 1
('kwantitattiva', 'ADJ') 1
('ADJ', 'kwantitattiv') 1
('kwantitattiv', 'null') 1
('null', 'kwalitattiva') 1
('kwalitattiva', 'ADJ') 1
('ADJ', 'kwalitattiv') 1
('kwalitattiv', 'null') 1
('null', 'jinÄ¡abru') 3
('jinÄ¡abru',

('null', 'Tunnellata') 1
('Tunnellata', 'PART-PASS') 1
('PART-PASS', 'Tunnellata') 1
('Tunnellata', 'null') 1
('null', 'Ekwivalenti') 1
('Ekwivalenti', 'ADJ') 1
('ADJ', 'Ekwivalenti') 1
('Ekwivalenti', 'null') 1
('null', 'Pitrolju') 1
('Pitrolju', 'NOUN-PROP') 1
('NOUN-PROP', 'Pitrolju') 1
('Pitrolju', 'null') 1
('null', 'TEP') 2
('TEP', 'X-ABV') 2
('X-ABV', 'TEP') 2
('TEP', 'null') 2
('null', 'Taljan') 8
('Taljan', 'ADJ') 8
('ADJ', 'Taljan') 8
('Taljan', 'null') 8
('null', '3') 15
('3', 'X-DIG') 19
('X-DIG', '3') 19
('3', 'null') 19
('null', 'NiÄ¡erjan') 1
('NiÄ¡erjan', 'NOUN-PROP') 1
('NOUN-PROP', 'NiÄ¡erjan') 1
('NiÄ¡erjan', 'null') 1
('null', '0.7') 1
('0.7', 'X-DIG') 1
('X-DIG', '0.7') 1
('0.7', 'null') 1
('null', '!') 11
('!', 'X-PUN') 11
('X-PUN', '!') 11
('!', 'null') 11
("''", '31') 13
('31', "''") 13
('null', 'gÄ§andhom') 47
('gÄ§andhom', 'VERB-PSEU') 49
('null', 'inqas') 16
('inqas', 'ADJ') 11
('ADJ', 'inqas') 11
('inqas', 'n-q-s') 17
('n-q-s', 'minn') 4
('null', '5') 6
('5'

('null', 'ecological') 1
('ecological', 'ADJ') 1
('ADJ', 'ecological') 1
('ecological', 'null') 1
('null', 'footprint') 1
('footprint', 'ADV') 1
('ADV', 'footprint') 1
('footprint', 'null') 1
('null', 'statistika') 1
('statistika', 'ADJ') 1
('ADJ', 'statistika') 1
('statistika', 'null') 2
('null', 'tiÅ¼en') 2
('tiÅ¼en', 'VERB') 2
('VERB', 'wiÅ¼en') 3
('wiÅ¼en', 'w-Å¼-n') 3
('w-Å¼-n', 'l-') 1
('null', 'nikkunsmaw') 1
('nikkunsmaw', 'VERB') 1
('VERB', 'ikkonsma') 1
('ikkonsma', 'null') 1
("''", '43') 10
('43', "''") 10
('null', 'gÄ§ax') 33
('gÄ§ax', 'CONJ-SUB') 36
('CONJ-SUB', 'gÄ§ax') 36
('gÄ§ax', 'null') 36
('null', 'jinvolvi') 6
('jinvolvi', 'VERB') 6
('VERB', 'involva') 12
('involva', 'null') 12
('n-q-s', 'tar-') 1
('s-t-gÄ§', 'jiÄ¡u') 2
("Ä¡-j-'", 'kkwantifikati') 1
('kkwantifikati', 'PART-PASS') 1
('PART-PASS', 'ikkwantifikat') 1
('ikkwantifikat', 'null') 1
('null', 'metri') 1
('metri', 'NOUN') 1
('NOUN', 'metru') 3
('metru', 'null') 3
('null', 'kwadrati') 1
('kwadrati', 'PART-PASS

('rridux', 'VERB') 2
('r-j-d', 'ninsew') 1
('ninsew', 'VERB') 1
('VERB', 'nesa') 2
('nesa', 'n-s-j') 2
('n-s-j', 'lanqas') 1
('lanqas', 'FOC') 5
('FOC', 'lanqas') 5
('lanqas', 'null') 5
('null', 'jiÅ¼en') 1
('jiÅ¼en', 'VERB') 1
('w-Å¼-n', '20') 1
('20', 'X-DIG') 7
('X-DIG', '20') 7
('20', 'null') 7
('null', 'kilo') 3
('kilo', 'NOUN') 3
('NOUN', 'kilo') 3
('kilo', 'null') 3
('Ä§-t-Ä¡', '1,500') 1
('1,500', 'X-DIG') 1
('X-DIG', '1,500') 1
('1,500', 'null') 1
('null', 'materja') 6
('materja', 'NOUN') 6
('NOUN', 'materja') 6
('materja', 'null') 6
('null', 'prima') 6
('prima', 'NOUN') 5
('NOUN', 'prim') 5
('prim', 'null') 7
("Ä¡-j-'", 'mmanufatturat') 1
('mmanufatturat', 'PART-PASS') 1
('PART-PASS', 'immanufatturat') 2
('immanufatturat', 'null') 2
("''", '58') 8
('58', "''") 8
('>', 'Idea') 2
('Idea', 'NOUN') 2
('NOUN', 'Idea') 2
('Idea', 'null') 2
('null', 'torbot') 2
('torbot', 'VERB') 2
('VERB', 'rabat') 8
('rabat', 'r-b-t') 8
('r-b-t', 'it-') 1
('null', 'traÅ¼Å¼in') 5
('traÅ¼Å¼in', 'NOU

('pass', 'p-s-j') 3
('p-s-j', 'tat-') 1
('k-t-r', 'Ä¡ie') 1
("Ä¡-j-'", 'mraÅ¼Å¼an') 1
('r-Å¼-n', '.') 1
("''", '82') 5
('82', "''") 5
('null', 'jkun') 21
('jkun', 'KIEN') 21
('null', 'ikollu') 2
('ikollu', 'VERB-PSEU') 3
('n-q-s', ',') 4
('null', 'jkollu') 2
('jkollu', 'VERB-PSEU') 2
('k-b-r', ',') 12
('null', 'jgÄ§ix') 3
('jgÄ§ix', 'VERB') 3
('gÄ§-j-x', 'aÄ§jar') 1
('aÄ§jar', 'ADV') 2
('ADV', 'aÄ§jar') 2
("''", '83') 5
('83', "''") 5
('null', 'alternattiva') 10
('alternattiva', 'NOUN') 6
('NOUN', 'alternattiv') 12
('alternattiv', 'null') 18
('d-w-m', 'ifittex') 1
('ifittex', 'VERB') 1
('VERB', 'fittex') 5
('fittex', 'f-t-x') 5
('f-t-x', 'kif') 1
('null', 'jakkwista') 1
('jakkwista', 'VERB') 1
('VERB', 'akkwista') 3
('akkwista', 'null') 3
('null', 'jiÄ¡Ä¡enera') 1
('jiÄ¡Ä¡enera', 'VERB') 1
('VERB', 'Ä¡Ä¡enera') 1
('Ä¡Ä¡enera', 'null') 1
('null', 'skart') 2
('skart', 'ADJ') 1
('ADJ', 'skart') 1
('gÄ§-j-x', 'agÄ§ar') 1
('agÄ§ar', 'ADJ') 2
('ADJ', 'agÄ§ar') 2
('agÄ§ar', 'null') 2
('null',

('tixrid', 'NOUN') 1
('NOUN', 'tixrid') 1
('tixrid', 'x-r-d') 1
('x-r-d', 'mingÄ§ajr') 1
('null', 'Ä¡eni') 1
('Ä¡eni', 'NOUN') 1
('NOUN', 'Ä¡eni') 1
('Ä¡eni', 'null') 1
('null', 'mmodifikati') 1
('mmodifikati', 'PART-PASS') 1
('PART-PASS', 'immodifikat') 3
('immodifikat', 'null') 3
('null', 'pollini') 1
('pollini', 'NOUN') 1
('NOUN', 'pollini') 1
('pollini', 'null') 1
('null', 'inbit') 1
('inbit', 'NOUN') 1
('NOUN', 'inbid') 1
('inbid', 'null') 1
('null', 'spontanju') 1
('spontanju', 'NOUN') 1
('NOUN', 'spontanju') 1
('spontanju', 'null') 1
('s-t-gÄ§', 'jikkontaminaw') 1
('jikkontaminaw', 'VERB') 1
('VERB', 'ikkontamina') 2
('ikkontamina', 'null') 2
('null', 'gÄ§elieqi') 1
('gÄ§elieqi', 'NOUN') 1
('NOUN', 'gÄ§alqa') 1
('gÄ§alqa', 'null') 1
('null', 'biswit') 1
('biswit', 'PREP') 1
('PREP', 'biswit') 1
('biswit', 'null') 1
('null', 'pjantaÄ¡Ä¡juni') 5
('pjantaÄ¡Ä¡juni', 'NOUN') 5
('NOUN', 'pjantaÄ¡Ä¡jun') 5
('pjantaÄ¡Ä¡jun', 'null') 5
('null', 'kkontrollati') 1
('kkontrollati', 'PART-PA

('null', 'bnedmin') 8
('bnedmin', 'NOUN') 8
('null', 'taÄ‹Ä‹essahom') 1
('taÄ‹Ä‹essahom', 'VERB') 1
('VERB', 'aÄ‹Ä‹essa') 1
('aÄ‹Ä‹essa', 'null') 1
("''", '130') 3
('130', "''") 3
('>', '30') 1
('30', 'X-DIG') 1
('X-DIG', '30') 1
('30', 'null') 1
('s-j-b', 'taÄ§t') 1
('taÄ§t', 'PREP') 11
('PREP', 'taÄ§t') 11
('taÄ§t', 't-Ä§-t') 13
('t-Ä§-t', 'wiÄ‹Ä‹') 1
('b-Ä‹-Ä‹', 'l-') 11
('k-b-r', 'jiÄ¡i') 1
("Ä¡-j-'", 'l-') 4
('null', 'wÅ¼at') 1
('wÅ¼at', 'PART-PASS') 1
('PART-PASS', 'uÅ¼at') 1
('uÅ¼at', 'w-Å¼-j') 1
('w-Å¼-j', 'min-') 1
("''", '131') 3
('131', "''") 3
('>', 'Barra') 7
('Barra', 'ADV') 7
('ADV', 'Barra') 7
('Barra', 'null') 7
('null', 'ftit') 24
('ftit', 'QUAN') 26
('QUAN', 'ftit') 26
('ftit', 'null') 34
('null', 'qatriet') 1
('qatriet', 'NOUN') 1
('NOUN', 'qattara') 1
('qattara', 'q-t-r') 1
('q-t-r', 'li') 1
('null', 'gÄ§adajjar') 1
('gÄ§adajjar', 'NOUN') 1
('NOUN', 'gÄ§adira') 2
('gÄ§adira', 'null') 1
('null', '0.03') 1
('0.03', 'X-DIG') 1
('X-DIG', '0.03') 1
('0.03', 'null') 1
('

('reponsabbiltÃ', 'null') 2
('null', 'regolamenti') 13
('regolamenti', 'NOUN') 13
('NOUN', 'regolament') 15
('regolament', 'null') 15
('null', 'miktuba') 1
('miktuba', 'PART-PASS') 1
('PART-PASS', 'miktub') 3
('miktub', 'k-t-b') 3
('k-t-b', 'mill-') 1
('null', 'kollettivitÃ') 1
('kollettivitÃ', 'NOUN') 1
('NOUN', 'kollettivitÃ') 1
('kollettivitÃ', 'null') 1
('null', 'fiskali') 1
('fiskali', 'ADJ') 1
('ADJ', 'fiskali') 1
('fiskali', 'null') 1
('null', 'finanzjament') 2
('finanzjament', 'ADV') 3
('ADV', 'finanzjament') 3
('finanzjament', 'null') 3
("''", '149') 3
('149', "''") 3
('>', '4') 2
("''", '150') 3
('150', "''") 3
('m-x-j', 'tal-') 3
('null', 'propjetÃ') 4
("''", '151') 3
('151', "''") 3
('aÄ§Ä§ar', 'ADJ') 10
('ADJ', 'aÄ§Ä§ar') 10
('null', 'gÄ§aÄ‹-') 1
('gÄ§aÄ‹-', 'PREP-DEF') 1
('null', 'distributuri') 2
('distributuri', 'NOUN') 2
('NOUN', 'distributur') 2
('distributur', 'null') 2
("''", '152') 3
('152', "''") 3
('null', 'tinÄ§tieÄ¡') 1
('tinÄ§tieÄ¡', 'VERB') 1
('Ä§-j-Ä¡', 'Ä§i

('d-q-s', ',') 1
('null', 'dnub') 3
('dnub', 'NOUN') 3
('NOUN', 'dnub') 3
('dnub', 'd-n-b') 3
('d-n-b', 'u') 1
('null', 'lanqas') 4
('null', 'gÄ§andek') 1
('gÄ§andek', 'VERB-PSEU') 1
('null', 'taÄ§seb') 2
('taÄ§seb', 'VERB') 2
('Ä§-s-b', 'fuqu') 1
('f-w-q', ';') 2
('null', 'omosesswalitÃ') 1
('omosesswalitÃ', 'NOUN') 1
('NOUN', 'omosesswalitÃ') 1
('omosesswalitÃ', 'null') 1
('null', 'tabÃ¹') 2
('tabÃ¹', 'NOUN') 1
('NOUN', 'tabÃ¹') 1
('tabÃ¹', 'null') 2
('null', 'mara') 6
('mara', 'NOUN') 7
('null', 'tmur') 2
('tmur', 'VERB') 2
('m-w-r', "ma'") 2
('d-n-b', 'tripplu') 1
('tripplu', 'VERB') 1
('VERB', 'tripplu') 1
('tripplu', 'null') 1
('null', 'Il-') 9
('null', 'linja') 14
('linja', 'NOUN') 14
('null', 'Ä§adra') 9
('Ä§adra', 'NOUN') 8
('NOUN', 'aÄ§dar') 8
('aÄ§dar', 'Ä§-d-r') 9
('Ä§-d-r', "''") 5
('null', 'forsi') 5
('forsi', 'ADV') 5
('ADV', 'forsi') 6
('forsi', 'null') 6
('null', 'raÄ¡el') 3
('raÄ¡el', 'NOUN') 3
('null', 'imur') 2
('imur', 'VERB') 3
('d-n-b', 'doppju') 1
('doppju', 'AD

('reÄ¡istri', 'NOUN') 2
('NOUN', 'reÄ¡istru') 2
('reÄ¡istru', 'null') 2
('null', 'tirrifletti') 3
('tirrifletti', 'VERB') 3
('null', 'karattri') 12
('karattri', 'NOUN') 12
('NOUN', 'karattru') 12
('karattru', 'null') 12
('null', 'aspirazzjonijiet') 1
('aspirazzjonijiet', 'NOUN') 1
('NOUN', 'aspirazzjoni') 1
('aspirazzjoni', 'null') 1
('null', 'ambjenti') 1
('ambjenti', 'NOUN') 1
('>', 'Adiam') 3
('Adiam', 'NOUN-PROP') 5
('NOUN-PROP', 'Adiam') 5
('Adiam', 'null') 6
('null', 'refuÄ¡jati') 2
('refuÄ¡jati', 'PART-PASS') 2
('PART-PASS', 'refuÄ¡jat') 2
('refuÄ¡jat', 'null') 3
('null', 'novella') 2
('novella', 'NOUN') 2
('null', 'inkapaÄ‹itÃ') 1
('inkapaÄ‹itÃ', 'NOUN') 1
('NOUN', 'inkapaÄ‹itÃ') 1
('inkapaÄ‹itÃ', 'null') 1
('null', 'tesprimi') 1
('tesprimi', 'VERB') 1
('VERB', 'esprima') 2
('esprima', 'null') 2
('null', 'profondi') 1
('profondi', 'ADJ') 1
('ADJ', 'profond') 1
('profond', 'null') 1
('null', 'trawmatiÄ‹i') 1
('trawmatiÄ‹i', 'ADJ') 1
('ADJ', 'trawmatiku') 3
('trawmatiku', 'null')

('null', 'toqol') 1
('toqol', 'NOUN') 1
('NOUN', 'toqol') 1
('toqol', 't-q-l') 1
('t-q-l', 'tal-') 1
('null', 'organizzazzjoni') 52
('organizzazzjoni', 'NOUN') 52
('null', 'metikoluÅ¼a') 2
('metikoluÅ¼a', 'ADJ') 1
('ADJ', 'metikoluÅ¼') 1
('null', 'jagÄ§tuk') 1
('jagÄ§tuk', 'VERB') 1
('null', 'impressjoni') 2
('impressjoni', 'NOUN') 2
('NOUN', 'impressjoni') 2
('impressjoni', 'null') 2
('null', 'profondament') 1
('profondament', 'ADV') 1
('ADV', 'profondament') 1
('profondament', 'null') 1
('null', 'Å¼baljata') 1
('Å¼baljata', 'PART-PASS') 1
('k-t-b', 'stejjer') 2
('null', 'bÄ§alhom') 1
('bÄ§alhom', 'PREP-PRON') 1
('PREP-PRON', 'bÄ§al') 3
('Ä§-d-r', 'hemm') 2
('n-s-Ä¡', 'kumplessi') 1
('kumplessi', 'NOUN') 1
('NOUN', 'kumpless') 1
('kumpless', 'null') 3
('null', 'jixhdu') 2
('jixhdu', 'VERB') 2
('null', 'Ä§ila') 5
('Ä§ila', 'NOUN-PROP') 3
('NOUN-PROP', 'Ä§ila') 3
('null', 'jikteb') 1
('jikteb', 'VERB') 1
('k-t-b', ',') 3
('null', 'frammentazzjoni') 1
('frammentazzjoni', 'NOUN') 1
('NOUN

('d-w-r', "'il") 1
('null', 'Ä§utha') 2
('Ä§utha', 'NOUN') 2
('NOUN', 'Ä§u') 3
('Ä§u', 'null') 3
('null', 'belt') 3
('belt', 'NOUN') 3
('b-l-t', 'li') 1
('null', 'tafha') 1
('tafha', 'VERB') 1
("'-'-f", 'gÄ§ax') 1
('null', 'gÄ§exet') 1
('gÄ§exet', 'VERB') 1
('gÄ§-j-x', 'fiha') 2
('null', 'gÄ§addiet') 2
('gÄ§addiet', 'VERB') 2
('VERB', 'gÄ§adda') 4
('gÄ§adda', 'gÄ§-d-d') 2
('gÄ§-d-d', 'mill-') 1
('null', 'trawmatika') 2
('trawmatika', 'ADJ') 2
('null', 'attakk') 1
('attakk', 'NOUN') 1
('NOUN', 'attakk') 1
('attakk', 'null') 1
('null', 'terroristiku') 1
('terroristiku', 'ADJ') 1
('ADJ', 'terroristiku') 1
('terroristiku', 'null') 1
('null', 'jispiÄ‹Ä‹a') 2
('jispiÄ‹Ä‹a', 'VERB') 2
('VERB', 'spiÄ‹Ä‹a') 5
('spiÄ‹Ä‹a', 'null') 5
('null', 'biha') 2
('null', 'titgÄ§annaq') 1
('titgÄ§annaq', 'VERB') 1
('VERB', 'tgÄ§annaq') 1
('tgÄ§annaq', 'gÄ§-n-q') 1
('gÄ§-n-q', 'malflaxing') 1
('malflaxing', 'PART-PASS') 1
('PART-PASS', 'malflaxing') 1
('malflaxing', 'null') 1
('null', 'banju') 3
('banju', 'N

('psikoloÄ¡ika', 'ADJ') 1
('ADJ', 'psikoloÄ¡iku') 1
('psikoloÄ¡iku', 'null') 1
('null', 'jagÄ§mlulhom') 1
('jagÄ§mlulhom', 'VERB') 1
('gÄ§-m-l', 'jitilfu') 1
('t-l-f', 'wkoll') 1
('Ä¡-s-m', 'tagÄ§hom') 1
('null', 'partijiet') 9
('partijiet', 'NOUN') 9
('NOUN', 'parti') 9
('parti', 'null') 9
('null', 'jogÄ§Ä¡buhomx') 1
('jogÄ§Ä¡buhomx', 'VERB') 1
('VERB', 'gÄ§oÄ¡ob') 1
('gÄ§oÄ¡ob', 'gÄ§-Ä¡-b') 1
('gÄ§-Ä¡-b', 'isiru') 1
('s-j-r', 'bÄ§allikieku') 1
('bÄ§allikieku', 'CONJ-SUB') 1
('CONJ-SUB', 'bÄ§allikieku') 1
('bÄ§allikieku', 'null') 1
('null', 'mixtieqa') 1
('mixtieqa', 'PART-PASS') 1
('PART-PASS', 'mixtieq') 1
('mixtieq', 'x-w-q') 1
('x-w-q', 'illi') 1
('illi', 'COMP') 1
('s-t-gÄ§', 'jeÄ§ilsu') 1
('jeÄ§ilsu', 'VERB') 1
('VERB', 'Ä§eles') 2
('Ä§eles', 'Ä§-l-s') 2
('Ä§-l-s', 'minnha') 1
('minnha', 'PREP-PRON') 4
('null', 'Kristi') 2
('null', 'drawwiet') 1
('drawwiet', 'NOUN') 1
('NOUN', 'drawwa') 1
('drawwa', 'd-r-j') 1
('d-r-j', 'strambi') 1
('strambi', 'ADJ') 1
('ADJ', 'stramb') 3
('str

('null', 'jifilÄ§ux') 1
('jifilÄ§ux', 'VERB') 1
('f-l-Ä§', 'gÄ§alihom') 1
('null', 'fiduÄ‹ja') 4
('fiduÄ‹ja', 'NOUN') 4
('NOUN', 'fiduÄ‹ja') 4
('fiduÄ‹ja', 'null') 4
('null', 'Ä¡ieli') 1
('Ä¡ieli', 'ADV') 1
('ADV', 'Ä¡ie') 1
("Ä¡-j-'", 'antipatija') 1
('antipatija', 'NOUN') 1
('NOUN', 'antipatija') 1
('antipatija', 'null') 1
('null', 'lejhom') 1
('lejhom', 'PREP-PRON') 1
('PREP-PRON', 'lejn') 1
('null', 'Ä¡abra') 1
('Ä¡abra', 'NOUN') 1
('NOUN', 'Ä¡bir') 1
('Ä¡bir', 'Ä¡-b-r') 1
('Ä¡-b-r', "ta'") 1
('null', 'tixtieq') 1
('tixtieq', 'VERB') 1
('VERB', 'xtaq') 6
('xtaq', 'x-w-q') 6
('x-w-q', 'li') 1
('null', 'relaxed') 1
('relaxed', 'NOUN') 1
('NOUN', 'relaxed') 1
('relaxed', 'null') 1
('null', 'Ä§abibtha') 1
('Ä§abibtha', 'VERB') 1
('VERB', 'Ä§abib') 1
('null', 'jikkonÄ‹ernawhiex') 1
('jikkonÄ‹ernawhiex', 'VERB') 1
('VERB', 'ikkonÄ‹erna') 1
('ikkonÄ‹erna', 'null') 1
('null', 'jgÄ§addux') 1
('jgÄ§addux', 'VERB') 1
('VERB', 'gÄ§added') 2
('gÄ§added', 'gÄ§-d-d') 2
('gÄ§-d-d', 'minn') 2
('nul

('Ä§-d-m', 'il-') 3
('null', 'PODIE') 9
('PODIE', 'NOUN') 2
('NOUN', 'PODIE') 2
('PODIE', 'null') 9
('null', 'People') 1
('People', 'NOUN-PROP') 1
('NOUN-PROP', 'People') 1
('People', 'null') 1
('null', 'Organization') 2
('Organization', 'X-ENG') 2
('X-ENG', 'Organization') 2
('Organization', 'null') 2
('null', 'for') 2
('for', 'X-ENG') 2
('X-ENG', 'for') 2
('for', 'null') 2
('null', 'Develompent') 1
('Develompent', 'X-ENG') 1
('X-ENG', 'Develompent') 1
('Develompent', 'null') 1
('null', 'International') 8
('International', 'X-ENG') 5
('X-ENG', 'International') 5
('International', 'null') 8
('null', 'Export') 1
('Export', 'NOUN-PROP') 1
('NOUN-PROP', 'Export') 1
('Export', 'null') 1
('Ä§-w-r', 'li') 1
('null', 'jimlew') 1
('jimlew', 'VERB') 1
('VERB', 'mela') 1
('m-l-j', 'il-') 1
('Ä¦anut', 'NOUN') 10
('NOUN', 'Ä¦anut') 10
('null', 'naÄ§dem') 2
('naÄ§dem', 'VERB') 2
('Ä§-d-m', 'fih') 1
('>', 'Issa') 3
('Issa', 'ADV') 3
('ADV', 'Issa') 3
('Issa', 'null') 3
('null', 'nara') 1
('nara', 'V

('q-s-m', 'xejn') 1
('s-j-b', ',') 1
('Ä¡-w-w', 's-') 2
('null', 'timpjega') 1
('timpjega', 'VERB') 1
('VERB', 'impjega') 3
('impjega', 'null') 3
('null', '57') 1
('57', 'X-DIG') 2
('X-DIG', '57') 2
('57', 'null') 2
('null', 'gÄ§aÅ¼la') 3
('gÄ§aÅ¼la', 'NOUN') 3
('NOUN', 'gÄ§aÅ¼la') 3
('gÄ§aÅ¼la', 'null') 3
('null', 'tnixxif') 1
('tnixxif', 'NOUN') 1
('NOUN', 'tnixxif') 1
('tnixxif', 'n-x-f') 1
('n-x-f', 'u') 1
('null', 'impakkettar') 7
('impakkettar', 'NOUN') 7
('NOUN', 'impakkettar') 7
('impakkettar', 'null') 7
('Ä§-w-r', '.') 2
('null', 'iqanqlu') 1
('iqanqlu', 'VERB') 1
('VERB', 'qanqal') 2
('qanqal', 'q-n-q-l') 2
('q-n-q-l', 'l-') 1
('null', 'gÄ§ira') 1
('gÄ§ira', 'NOUN') 1
('NOUN', 'gÄ§ira') 1
('gÄ§ira', 'null') 1
('taÄ§riÄ¡', 'VERB') 3
('VERB', 'Ä§ruÄ¡') 3
('Ä§-r-Ä¡', 'tekniku') 1
('tekniku', 'ADJ') 2
('null', 'vaganza') 1
('vaganza', 'NOUN') 1
('v-g-j', ',') 1
('f-n-d', 'gÄ§all-') 2
('null', 'pensjoni') 1
('pensjoni', 'NOUN') 1
('NOUN', 'pensjoni') 1
('pensjoni', 'null') 1
('nul

('IÅ¼rael', 'NOUN-PROP') 1
('NOUN-PROP', 'IÅ¼rael') 1
('IÅ¼rael', 'null') 2
('null', 'tippjana') 1
('tippjana', 'VERB') 1
('null', 'Ä§idmietha') 1
('Ä§idmietha', 'NOUN') 1
('NOUN', 'Ä§idma') 1
('Ä§idma', 'null') 1
('null', 'tgÄ§inhom') 1
('tgÄ§inhom', 'VERB') 1
('gÄ§-j-n', 'jiÅ¼viluppaw') 1
('jiÅ¼viluppaw', 'VERB') 4
('null', 'ttejjeb') 4
('ttejjeb', 'VERB') 4
('t-j-b', 'il-') 5
('null', 'produttiva') 1
('produttiva', 'NOUN') 1
('NOUN', 'produttiv') 2
('null', 'tÄ§arriÄ¡hom') 1
('tÄ§arriÄ¡hom', 'VERB') 1
('VERB', 'Ä§arreÄ¡') 1
('Ä§arreÄ¡', 'Ä§-r-Ä¡') 1
('Ä§-r-Ä¡', 'fit-') 2
('m-x-j', 'u') 4
("Ä¡-j-'", 'organizzati') 1
('b-d-j', 'jipproduÄ‹u') 1
('null', 'jbiigÄ§u') 1
('jbiigÄ§u', 'VERB') 1
('b-j-gÄ§', 'il-') 1
('null', 'fond') 2
('b-j-d', 'kien') 1
('null', '$') 3
('$', 'X-PUN') 3
('X-PUN', '$') 3
('$', 'null') 3
('null', '1000') 1
('1000', 'X-DIG') 1
('X-DIG', '1000') 1
('1000', 'null') 1
('null', 'maqsum') 1
('maqsum', 'PART-PASS') 1
('PART-PASS', 'maqsum') 2
('maqsum', 'q-s-m') 2
('

('42', 'null') 1
('null', 'km') 1
('km', 'X-ABV') 1
('X-ABV', 'km') 1
('km', 'null') 1
('b-gÄ§-d', 'minn') 1
('null', 'Khulna') 1
('Khulna', 'NOUN-PROP') 1
('NOUN-PROP', 'Khulna') 1
('Khulna', 'null') 1
('null', 'xtrat') 1
('xtrat', 'VERB') 1
('x-r-j', 'mingÄ§andha') 1
('mingÄ§andha', 'PREP-PRON') 1
('PREP-PRON', 'mingÄ§and') 2
('null', 'Sir') 2
('Sir', 'NOUN-PROP') 2
('NOUN-PROP', 'Sir') 2
('Sir', 'null') 2
('null', 'Ltd') 2
('Ltd', 'X-ABV') 2
('X-ABV', 'Ltd') 2
('Ltd', 'null') 2
('Ä¡-w-w', 'Morbegno') 1
('Morbegno', 'NOUN-PROP') 1
('NOUN-PROP', 'Morbegno') 1
('Morbegno', 'null') 1
('null', 'Sondrio') 1
('Sondrio', 'NOUN-PROP') 2
('NOUN-PROP', 'Sondrio') 2
('Sondrio', 'null') 2
('null', 'Ä§arÄ¡et') 2
('Ä§arÄ¡et', 'VERB') 2
('Ä§-r-Ä¡', 'ukoll') 1
('null', 'katalgu') 1
('katalgu', 'NOUN') 1
('NOUN', 'katalgu') 1
('katalgu', 'null') 1
('null', 'kollaborazzjoni') 3
('kollaborazzjoni', 'NOUN') 3
('NOUN', 'kollaborazzjoni') 3
('kollaborazzjoni', 'null') 3
('qrib', 'NOUN') 3
('NOUN', 'qrib')

('null', 'TagÄ§lim') 1
('TagÄ§lim', 'NOUN') 1
('null', 'Investigazzjoni') 1
('Investigazzjoni', 'NOUN') 1
('NOUN', 'Investigazzjoni') 1
('Investigazzjoni', 'null') 1
('null', 'Azzjoni') 1
('Azzjoni', 'NOUN') 1
('NOUN', 'Azzjoni') 1
('Azzjoni', 'null') 1
('null', 'jagÄ§Ä¡nu') 1
('jagÄ§Ä¡nu', 'VERB') 1
('VERB', 'gÄ§aÄ¡en') 2
('gÄ§aÄ¡en', 'gÄ§-Ä¡-n') 2
('gÄ§-Ä¡-n', 'fil-') 1
('null', 'lezzjonijiet') 3
('lezzjonijiet', 'NOUN') 3
('NOUN', 'lezzjoni') 3
('lezzjoni', 'null') 3
('null', 'Drittijiet') 1
('Drittijiet', 'NOUN') 1
('null', 'Bidla') 1
('Bidla', 'NOUN') 1
('null', 'Klima') 1
('Klima', 'NOUN') 1
('NOUN', 'Klima') 1
('Klima', 'null') 1
('null', 'iwettqu') 1
('iwettqu', 'VERB') 1
('VERB', 'wettaq') 7
('w-t-q', 'dan') 2
('gÄ§-t-j', 'eÅ¼erÄ‹izzji') 1
('eÅ¼erÄ‹izzji', 'NOUN') 1
('NOUN', 'eÅ¼erÄ‹izzju') 1
('eÅ¼erÄ‹izzju', 'null') 1
('null', 'komprensjoni') 1
('komprensjoni', 'NOUN') 1
('NOUN', 'komprensjoni') 1
('komprensjoni', 'null') 1
('null', 'suÄ¡Ä¡etti') 2
('null', 'jonkella') 3
('jo

('null', 'te') 1
('te', 'NOUN') 1
('NOUN', 'te') 1
('te', 'null') 1
('null', 'kafÃ¨') 11
('kafÃ¨', 'NOUN') 12
('NOUN', 'kafÃ¨') 12
('kafÃ¨', 'null') 12
('k-l-l', 'Ä‹ans') 2
('Ä‹ans', 'NOUN') 3
('NOUN', 'Ä‹ans') 3
('Ä‹ans', 'null') 3
('null', 'iduqu') 1
('iduqu', 'VERB') 1
('VERB', 'daq') 2
('daq', 'd-w-q') 2
('d-w-q', 'ftit') 1
('k-s-b', 'informazzjoni') 1
('null', 'fuljetti') 1
('fuljetti', 'NOUN') 2
('NOUN', 'fuljett') 2
('fuljett', 'null') 2
('null', 'tqassmu') 2
('tqassmu', 'VERB') 2
('q-s-m', 'waqt') 1
('w-q-t', 'din') 1
('null', 'wirja') 1
('wirja', 'NOUN') 2
('NOUN', 'wirja') 3
('wirja', "r-'-j") 3
("r-'-j", '.') 1
('null', 'wassalna') 1
('wassalna', 'VERB') 1
('null', 'messaÄ¡Ä¡') 3
('messaÄ¡Ä¡', 'NOUN') 3
('NOUN', 'messaÄ¡Ä¡') 3
('messaÄ¡Ä¡', 'null') 3
('Ä§-l-q', 'kuxjenza') 1
('null', 'ngÄ§aqdu') 1
('ngÄ§aqdu', 'VERB') 1
('VERB', 'ngÄ§aqad') 1
('ngÄ§aqad', 'gÄ§-q-d') 1
('gÄ§-q-d', 'magÄ§hom') 1
('null', 'edukattiva') 1
('edukattiva', 'ADJ') 1
('null', 'Journey') 1
('Journey',

('ogÄ§la', 'ADJ') 4
('null', 'standards') 2
('standards', 'NOUN') 2
('>', '5') 1
('>', 'Prezz') 1
('Prezz', 'NOUN') 1
('NOUN', 'Prezz') 1
('Prezz', 'null') 1
("Ä¡-j-'", 'miftiehem') 1
('f-h-m', 'mill-') 1
('Ä§-l-s', 'Ä¡ust') 1
('null', 'kompetittiv') 2
('kompetittiv', 'ADJ') 2
('ADJ', 'kompetittiv') 3
('kompetittiv', 'null') 3
('null', 'strutturi') 2
('strutturi', 'NOUN') 3
('NOUN', 'struttura') 3
('struttura', 'null') 3
('null', 'deÄ‹iÅ¼joni') 1
('deÄ‹iÅ¼joni', 'NOUN') 1
('w-Å¼-j', 'mill-') 2
('n-q-s', 'possibbli') 1
('null', 'tfisser') 2
('f-s-r', 'li') 4
('Ä§-l-s', 'huwa') 3
('null', 'soÄ‹jalment') 2
('soÄ‹jalment', 'ADV') 2
('ADV', 'soÄ‹jalment') 2
('soÄ‹jalment', 'null') 2
('null', 'aÄ‹Ä‹ettabbli') 1
('aÄ‹Ä‹ettabbli', 'ADJ') 1
('ADJ', 'aÄ‹Ä‹ettabbli') 1
('aÄ‹Ä‹ettabbli', 'null') 1
('q-j-s', 'Ä¡ust') 1
('null', 'nfushom') 5
('nfushom', 'VERB') 5
('VERB', 'nnifsu') 5
('null', 'bbaÅ¼at') 1
('bbaÅ¼at', 'PART-PASS') 1
('f-w-q', 'prinÄ‹ipju') 1
('prinÄ‹ipju', 'NOUN') 1
('Ä§-l-s', 'indaq

('analizza', 'null') 1
('null', 'modi') 3
('null', 'Å¼viluppa') 3
('Å¼viluppa', 'VERB') 3
('VERB', 'iÅ¼viluppa') 3
('iÅ¼viluppa', 'null') 4
('null', 'jagÄ§rafx') 1
('jagÄ§rafx', 'VERB') 1
('gÄ§-r-f', 'kemm') 2
('null', 'inbidlu') 1
('inbidlu', 'VERB') 1
('b-d-l', 'l-') 1
('>', 'FaÄ‹li') 1
('FaÄ‹li', 'ADJ') 1
('ADJ', 'FaÄ‹li') 1
('FaÄ‹li', 'null') 1
('null', 'jagÄ§raf') 1
('jagÄ§raf', 'VERB') 1
('gÄ§-j-d', 'sallum') 1
('null', 'deskrizzjoni') 1
('deskrizzjoni', 'NOUN') 1
('NOUN', 'deskrizzjoni') 1
('deskrizzjoni', 'null') 1
('t-j-b', "ta'") 1
('null', 'dinamiku') 1
('dinamiku', 'ADJ') 1
('ADJ', 'dinamiku') 1
('dinamiku', 'null') 1
('null', 'jgÄ§addi') 1
('jgÄ§addi', 'VERB') 1
('gÄ§-d-j', 'minn') 2
('null', 'kunflitti') 3
('kunflitti', 'NOUN') 3
('NOUN', 'kunflitti') 3
('kunflitti', 'null') 3
('d-h-r', 'xi') 1
('null', 'kontradittorju') 1
('kontradittorju', 'ADJ') 1
('ADJ', 'kontradittorju') 1
('kontradittorju', 'null') 1
('null', 'konvint') 1
('konvint', 'PART-PASS') 1
('PART-PASS', 'ko

('twettieq', 'NOUN') 1
('NOUN', 'twettieq') 1
('twettieq', 'null') 1
('politika', 'ADJ') 3
('ADJ', 'politika') 3
('null', 'strateÄ¡iji') 2
('strateÄ¡iji', 'NOUN') 2
('null', 'jseÄ§Ä§') 2
('jseÄ§Ä§', 'VERB') 2
('s-Ä§-Ä§', 'fi') 1
('null', 'jvarjaw') 1
('jvarjaw', 'VERB') 1
('VERB', 'varja') 1
('varja', 'null') 1
('null', 'grad') 1
('grad', 'NOUN') 1
('NOUN', 'grad') 1
('grad', 'null') 1
('l-Ä§-q', ',') 1
('null', 'parteÄ‹ipanti') 6
('parteÄ‹ipanti', 'NOUN') 6
('NOUN', 'parteÄ‹ipant') 6
('parteÄ‹ipant', 'null') 6
('Ä§-d-m', "ta'") 1
('r-j-d', 'joffri') 1
('Ä§-l-s', 'li') 1
('null', 'jippermetti') 1
('jippermetti', 'VERB') 1
('VERB', 'ippermetta') 2
('ippermetta', 'null') 2
('null', 'dinjituÅ¼') 1
('dinjituÅ¼', 'ADJ') 1
('ADJ', 'dinjituÅ¼') 1
('dinjituÅ¼', 'null') 1
('>', 'Idealment') 1
('Idealment', 'ADV') 1
('ADV', 'Idealment') 1
('Idealment', 'null') 1
('f-h-m', 'bejn') 1
('null', 'impoÅ¼izzjoni') 1
('impoÅ¼izzjoni', 'NOUN') 1
('NOUN', 'impoÅ¼izzjoni') 1
('impoÅ¼izzjoni', 'null') 1
('n

('null', 'joÄ§loq') 1
('joÄ§loq', 'VERB') 1
('null', 'minfloku') 1
('minfloku', 'PREP-PRON') 1
('PREP-PRON', 'minflok') 1
('Ä¡-d-d', "ta'") 3
('null', 'jitratta') 1
('jitratta', 'VERB') 1
('null', 'wiÄ‹Ä‹') 2
('gÄ§-t-j', 'iÅ¼jed') 1
('null', 'milli') 4
('null', 'produttivitÃ') 4
('produttivitÃ', 'NOUN') 4
('NOUN', 'produttivitÃ') 4
('produttivitÃ', 'null') 4
('null', 'ssib') 1
('ssib', 'VERB') 2
('f-w-q', 'xkaffa') 1
('xkaffa', 'NOUN') 1
('NOUN', 'xkaffa') 1
('xkaffa', 'null') 1
('null', 'supermarket') 1
('supermarket', 'NOUN') 1
('NOUN', 'supermarket') 1
('supermarket', 'null') 1
('null', 'wettquh') 1
('wettquh', 'VERB') 1
('w-t-q', 'u') 1
('gÄ§-j-x', 'fihom') 1
('s-j-r', 'joffri') 1
('null', 'kuxjenza') 3
('null', 'wassal') 2
('wassal', 'VERB') 2
('null', 'tipi') 2
('tipi', 'NOUN') 2
('NOUN', 'tip') 5
('tip', 'null') 5
('s-Ä§-b', '.') 1
('null', 'twieled') 1
('twieled', 'VERB') 1
('VERB', 'wieled') 1
('wieled', 'w-l-d') 1
('w-l-d', 'biex') 1
('s-Ä§-b', 'ekonomika') 1
("Ä¡-j-'", 'iffu

('politiku', 'ADJ') 1
('null', 'manifestazzjonijiet') 1
('manifestazzjonijiet', 'NOUN') 1
('NOUN', 'manifestazzjoni') 1
('manifestazzjoni', 'null') 1
('null', 'Millennium') 1
('Millennium', 'NOUN') 1
('NOUN', 'Millennium') 1
('Millennium', 'null') 1
('null', 'BÄ§ala') 1
('null', 'jinfetaÄ§') 1
('jinfetaÄ§', 'VERB') 1
('f-t-Ä§', 'Ä‹entru') 1
('null', 'apparti') 1
('apparti', 'ADV') 1
('ADV', 'apparti') 1
('apparti', 'null') 1
('null', 'Ä§jata') 1
('Ä§jata', 'NOUN') 1
('NOUN', 'Ä§jata') 1
('Ä§jata', 'null') 1
('q-t-gÄ§', 'tad-') 1
('null', 'drapp') 1
('drapp', 'NOUN') 1
('NOUN', 'drapp') 2
('drapp', 'null') 2
('>', 'Qalulna') 1
('Qalulna', 'VERB') 1
('null', '200') 1
('200', 'X-DIG') 1
('X-DIG', '200') 1
('200', 'null') 1
("r-'-j", 'ix-') 1
('null', 'xewqa') 2
('xewqa', 'NOUN') 2
('null', 'jinktibu') 1
('jinktibu', 'VERB') 1
('k-t-b', 'biex') 1
('Ä§-r-Ä¡', 'Ä§alli') 1
('Ä§alli', 'CONJ-SUB') 1
('CONJ-SUB', 'Ä§alli') 1
('Ä§alli', 'null') 1
('s-t-gÄ§', 'isibu') 1
('isibu', 'VERB') 1
('s-j-b

('istandards', 'null') 3
('s-t-gÄ§', 'tuÅ¼a') 1
("''", '171') 1
('171', "''") 1
("''", '172') 1
('172', "''") 1
('>', 'Transfair') 1
('null', 'Fairtrade') 2
('Fairtrade', 'NOUN-PROP') 2
('NOUN-PROP', 'Fairtrade') 2
('Fairtrade', 'null') 2
('null', 'Italia') 2
('Italia', 'NOUN-PROP') 2
('NOUN-PROP', 'Italia') 2
('Italia', 'null') 2
('inizzjattiva', 'ADJ') 1
('ADJ', 'inizzjattiva') 1
('Taljana', 'NOUN') 1
('NOUN', 'taljan') 1
('FLO', 'X-ABV') 2
('X-ABV', 'FLO') 2
("''", '173') 1
('173', "''") 1
('null', 'biÄ‹-') 1
('biÄ‹-', 'PREP-DEF') 2
('null', 'jinkitbu') 1
('jinkitbu', 'VERB') 1
('k-t-b', "f'") 1
('null', 'apposta') 1
('apposta', 'ADV') 1
('ADV', 'apposta') 1
('apposta', 'null') 1
('null', 'jinqasmu') 1
('jinqasmu', 'VERB') 1
('q-s-m', 'skond') 1
("''", '174') 1
('174', "''") 1
('x-w-q', 'jbigÄ§u') 1
('b-j-gÄ§', 'prodotti') 1
('null', 'liÄ‹enzja') 3
('liÄ‹enzja', 'NOUN') 3
("''", '175') 1
('175', "''") 1
('>', 'â€') 1
('s-t-gÄ§', 'gÄ§aldaqstant') 1
("''", '176') 1
('176', "''") 1
('n

('w-Å¼-j', 'ukoll') 1
('k-b-r', 'malajr') 1
('w-r-j', ',') 1
("Ä¡-j-'", 'mormija') 1
('mormija', 'PART-PASS') 1
('PART-PASS', 'rema') 1
('rema', 'r-m-j') 2
('r-m-j', 'barra') 1
("''", '206') 1
('206', "''") 1
('null', 'eÅ¼empji') 1
('eÅ¼empji', 'NOUN') 1
('null', 'tinvadi') 1
('tinvadi', 'VERB') 1
('Ä§-s-r', '.') 2
("''", '207') 1
('207', "''") 1
('r-b-t', 'mas-') 1
('mas-', 'PREP-DEF') 1
('null', 'sostnew') 1
('sostnew', 'VERB') 1
('null', 'oppoÅ¼izzjoni') 2
('oppoÅ¼izzjoni', 'NOUN') 2
('NOUN', 'oppoÅ¼izzjoni') 2
('oppoÅ¼izzjoni', 'null') 2
('null', 'effettiva') 1
('effettiva', 'ADJ') 1
("''", '208') 1
('208', "''") 1
('w-l-d', 'wkoll') 1
("''", '209') 1
('209', "''") 1
('null', 'maÄ§duma') 3
('maÄ§duma', 'PART-PASS') 3
('null', 'telimina') 1
('telimina', 'VERB') 1
('VERB', 'elimina') 2
('elimina', 'null') 2
('gÄ§-t-j', 'prezz') 1
('q-gÄ§-d', 'l-') 1
('null', 'tagÄ§om') 1
('tagÄ§om', 'VERB') 1
('q-b-l', 'l-') 1
('null', 'ingrossa') 1
('ingrossa', 'NOUN') 1
('NOUN', 'ingrossa') 1
('ing

("''", '248') 1
('248', "''") 1
('null', "jibqa'") 1
('b-q-gÄ§', 'l-') 2
('null', 'insettiÄ‹idi') 1
('insettiÄ‹idi', 'NOUN') 1
("''", '249') 1
('249', "''") 1
('w-s-l', 'gÄ§at-') 2
('null', 'inettiÄ‹isidi') 1
('inettiÄ‹isidi', 'NOUN') 1
('NOUN', 'inettiÄ‹isidi') 1
('inettiÄ‹isidi', 'null') 1
('null', 'Ä¡eneriku') 1
('Ä¡eneriku', 'NOUN') 1
('NOUN', 'Ä¡eneriku') 1
('Ä¡eneriku', 'null') 1
('s-t-gÄ§', 'jikbru') 1
('q-b-l', 'ftit') 1
('q-j-s', "ta'") 1
("''", '250') 1
('250', "''") 1
('null', 'studji') 1
('studji', 'NOUN') 1
('null', 'jaqblu') 1
('jaqblu', 'VERB') 1
('q-b-l', 'almenu') 1
('almenu', 'ADV') 2
('ADV', 'almenu') 2
('almenu', 'null') 2
('f-w-q', 'punt') 2
('gÄ§-t-j', 'ebda') 2
("''", '251') 1
('251', "''") 1
('Ä§-w-f', 'jiÄ¡u') 1
("Ä¡-j-'", 'mÄ‹aÄ§Ä§da') 1
('mÄ‹aÄ§Ä§da', 'PART-PASS') 2
('PART-PASS', 'mÄ‹aÄ§Ä§ad') 2
('mÄ‹aÄ§Ä§ad', 'c-Ä§-d') 2
('c-Ä§-d', 'mid-') 1
('null', 'jissieÄ§eb') 1
('jissieÄ§eb', 'VERB') 1
('n-q-s', 'totali') 1
('null', 'superviÅ¼joni') 1
('superviÅ¼joni', 

('tirrigwarda', 'VERB') 1
('VERB', 'rrigwarda') 1
('rrigwarda', 'null') 1
('null', 'joÄ§orÄ¡u') 1
('null', 'jipproduÄ‹uhom') 1
('jipproduÄ‹uhom', 'VERB') 1
('VERB', 'pproduÄ‹a') 1
('pproduÄ‹a', 'null') 1
('Ä', 'aladarba') 2
('aladarba', 'NOUN') 1
('NOUN', 'Ä') 2
('aladarba', 'null') 1
('null', 'mhemmx') 1
('mhemmx', 'HEMM') 1
('null', 'sodisfaÄ‹enti') 1
('sodisfaÄ‹enti', 'ADJ') 1
('ADJ', 'sodisfaÄ‹enti') 1
('sodisfaÄ‹enti', 'null') 1
('null', 'tiÄ¡Ä¡enera') 1
('tiÄ¡Ä¡enera', 'VERB') 1
('VERB', 'iÄ¡Ä¡enera') 1
('iÄ¡Ä¡enera', 'null') 1
('null', 'stabbiltÃ') 1
('stabbiltÃ', 'NOUN') 1
('null', 'reÄ¡junijiet') 1
('reÄ¡junijiet', 'NOUN') 1
('NOUN', 'reÄ¡jun') 1
('reÄ¡jun', 'null') 1
('null', 'ibda') 1
('ibda', 'VERB') 1
('Ä§-s-b', 'fil-') 1
('null', 'nillimitaw') 1
('nillimitaw', 'VERB') 1
('VERB', 'illimita') 2
('illimita', 'null') 2
('null', 'irwieÄ§na') 1
('irwieÄ§na', 'VERB') 1
('VERB', 'ruÄ§') 1
('ruÄ§', 'r-w-Ä§') 1
('r-w-Ä§', 'gÄ§all-') 1
('null', 'karri') 1
('karri', 'NOUN') 1
('NOUN'

('null', 'ekomija') 2
('ekomija', 'NOUN') 2
('null', 'Å¼vantaÄ¡Ä¡jata') 1
('Å¼vantaÄ¡Ä¡jata', 'PART-PASS') 1
('PART-PASS', 'Å¼vantaÄ¡Ä¡jat') 1
('Å¼vantaÄ¡Ä¡jat', 'null') 1
('f-q-r', 'ftit') 1
('null', 'negozjati') 2
('negozjati', 'NOUN') 2
('NOUN', 'negozjat') 2
('negozjat', 'null') 2
('f-h-m', "f'") 1
('null', 'GATS') 1
('GATS', 'X-ABV') 1
('X-ABV', 'GATS') 1
('GATS', 'null') 1
('null', 'Ä§eles') 1
('Ä§eles', 'VERB') 1
('Ä§-l-s', 'is-') 1
('s-Ä§-Ä§', 'saru') 1
('s-j-r', 'prodotti') 1
('null', 'taqlib') 1
('taqlib', 'NOUN') 1
('NOUN', 'taqlib') 1
('taqlib', 'q-l-b') 1
('q-l-b', 'tas-') 1
('null', 'kompetituri') 1
('kompetituri', 'NOUN') 1
('NOUN', 'kompetitur') 1
('kompetitur', 'null') 1
('b-r-j', "jista'") 1
('s-t-gÄ§', 'jbaxxi') 1
('jbaxxi', 'VERB') 1
('b-x-j', 'l-') 2
('null', 'tifrik') 1
('tifrik', 'NOUN') 1
('NOUN', 'tifrik') 1
('tifrik', 'f-r-k') 1
('f-r-k', 'tal-') 1
('null', 'konvenjenza') 1
('konvenjenza', 'NOUN') 1
('NOUN', 'konvenjenza') 1
('konvenjenza', 'null') 1
('null', 

('null', 'dgÄ§ajjef') 1
('dgÄ§ajjef', 'ADJ') 1
('ADJ', 'dgÄ§ajjef') 1
('dgÄ§ajjef', 'd-gÄ§-f') 1
('d-gÄ§-f', ',') 1
('null', 'kompliÄ‹i') 1
('kompliÄ‹i', 'ADJ') 1
('ADJ', 'kompliÄ‹i') 1
('kompliÄ‹i', 'null') 1
('>', 'JistgÄ§u') 1
('JistgÄ§u', 'VERB') 1
('s-t-gÄ§', 'il-') 1
('>', "Tista'") 1
("Tista'", 'VERB') 1
('s-t-gÄ§', 'azjenda') 1
('null', 'teÅ¼isti') 2
('teÅ¼isti', 'VERB') 2
('>', 'Le') 1
('Le', 'INT') 1
('INT', 'Le') 1
('Le', 'null') 1
('>', 'Lanqas') 1
('Lanqas', 'FOC') 1
('FOC', 'Lanqas') 1
('Lanqas', 'null') 1
('s-t-gÄ§', 'jwelled') 1
('jwelled', 'VERB') 1
('VERB', 'welled') 1
('welled', 'w-l-d') 1
('w-l-d', 'azjenda') 1
('s-t-gÄ§', 'jiggarantixxi') 1
('jiggarantixxi', 'VERB') 1
('null', 'limitati') 1
('limitati', 'PART-PASS') 1
('s-t-gÄ§', 'jiffirma') 1
('jiffirma', 'VERB') 1
('f-h-m', 'kummerÄ‹jali') 1
('null', 'jillimitaw') 1
('jillimitaw', 'VERB') 1
('null', 'jÅ¼omm') 1
('Å¼-m-m', 'taÄ§t') 1
('t-Ä§-t', 'kontroll') 1
('s-j-r', 'qawwija') 1
('q-w-j', 'l-') 1
('n-Ä§-j', 'tar

('trejdunjins', 'NOUN') 1
('attivi', 'NOUN') 1
('NOUN', 'attiv') 1
('null', 'Ä¡imagÄ§tejn') 1
('Ä¡imagÄ§tejn', 'NOUN') 1
('Ä¡-m-gÄ§', 'Å¼orna') 1
('Å¼-w-r', 'diversi') 1
('null', 'intervistajna') 1
('intervistajna', 'VERB') 1
('VERB', 'intervista') 1
('gÄ§-t-j', 'ftit') 1
('Ä§-j-l', 'tas-') 1
('null', 'Ä§arxa') 1
('Ä§arxa', 'NOUN') 1
('NOUN', 'aÄ§rax') 1
('aÄ§rax', 'Ä§-r-x') 1
('Ä§-r-x', 'li') 1
('Ä§-d-m', 'fiha') 2
('x-r-j', 'aÄ§na') 1
('null', 'magÄ§Å¼ul') 1
('magÄ§Å¼ul', 'PART-PASS') 1
('gÄ§-Å¼-l', 'gÄ§all-') 1
('Isvezja', 'NOUN-PROP') 4
('NOUN-PROP', 'Isvezja') 4
('Isvezja', 'null') 4
("'-Ä§-d", 'qatgÄ§a') 1
('qatgÄ§a', 'NOUN') 1
('NOUN', 'qatgÄ§a') 1
('qatgÄ§a', 'q-t-gÄ§') 1
('q-t-gÄ§', ':') 1
('null', 'qalziet') 1
('null', 'mibjugÄ§') 1
('mibjugÄ§', 'PART-PASS') 1
('PART-PASS', 'mibjugÄ§') 2
('mibjugÄ§', 'b-j-gÄ§') 2
('b-j-gÄ§', "f'") 1
('s-w-j', 'iktar') 1
('null', 'jirnexxilek') 1
('jirnexxilek', 'VERB') 1
('gÄ§-j-x', 'sew') 1
('s-t-gÄ§', 'jagÄ§mlu') 2
('null', 'Clean') 3
('Cle

('PART-PASS', 'gÄ§abba') 1
('gÄ§abba', 'gÄ§-b-j') 1
('gÄ§-b-j', 'bil-') 1
('b-d-j', 'attivitÃ') 1
('null', 'inqajmu') 1
('inqajmu', 'VERB') 1
('q-j-m', 'kuxxjenza') 1
('kuxxjenza', 'NOUN') 1
('NOUN', 'kuxxjenza') 1
('kuxxjenza', 'null') 1
('null', 'Afrikan') 1
('Afrikan', 'NOUN-PROP') 1
('NOUN-PROP', 'Afrikan') 1
('Afrikan', 'null') 1
('b-d-j', 'niÄ¡bru') 1
('niÄ¡bru', 'VERB') 1
('null', 'edukattiv') 1
('edukattiv', 'ADJ') 1
('null', 'sanitarju') 1
('sanitarju', 'NOUN') 1
('NOUN', 'sanitarju') 1
('sanitarju', 'null') 1
('null', 'spiÄ‹Ä‹at') 1
('spiÄ‹Ä‹at', 'VERB') 1
('null', 'telqet') 1
('telqet', 'VERB') 1
('t-l-q', 'mit-') 1
('Angola', 'NOUN') 4
('NOUN', 'angolu') 4
('angolu', 'null') 4
('null', 'Ä§dimna') 1
('Ä§dimna', 'VERB') 1
('null', 'Ministeru') 1
('Ministeru', 'NOUN') 1
('NOUN', 'Ministeru') 1
('Ministeru', 'null') 1
('null', 'Affarijiet') 1
('Affarijiet', 'NOUN') 1
('gÄ§-j-n', 'lir-') 1
('null', 'veterani') 1
('veterani', 'ADJ') 1
('ADJ', 'veteran') 1
('veteran', 'null') 1
('

('k-l-l', 'tkun') 1
('>', 'IÅ¼jed') 1
('IÅ¼jed', 'ADV') 1
('iÅ¼jed', 'Å¼-j-d') 1
('Å¼-j-d', 'ma') 1
('null', 'qorbot') 1
('qorbot', 'VERB') 1
('VERB', 'qorob') 1
('qorob', 'q-r-b') 1
('q-r-b', 'id-') 1
('null', 'data') 1
('data', 'NOUN') 1
('NOUN', 'data') 1
('data', 'null') 1
('t-l-q', ',') 1
('null', 'bdejt') 1
('bdejt', 'VERB') 1
('b-d-j', 'inÄ§ossni') 1
('inÄ§ossni', 'VERB') 1
('Ä§-s-s', 'eÄ‹itata') 1
('eÄ‹itata', 'PART-PASS') 1
('PART-PASS', 'eÄ‹itat') 1
('eÄ‹itat', 'null') 1
('null', 'beÅ¼gÄ§ana') 1
('beÅ¼gÄ§ana', 'NOUN') 1
('NOUN', 'beÅ¼gÄ§an') 1
('beÅ¼gÄ§an', 'b-Å¼-gÄ§') 1
('b-Å¼-gÄ§', 'wkoll') 1
('>', 'Kelli') 1
('Kelli', 'VERB-PSEU') 1
('k-l-l', 'rasi') 1
('r-j-s', 'ddur') 1
('ddur', 'VERB') 2
('d-w-r', 'bil-') 1
('null', 'tekniku') 1
('>', 'Tlaqna') 1
('Tlaqna', 'VERB') 1
('t-l-q', 'minn') 1
('null', '16') 1
('16', 'X-DIG') 1
('X-DIG', '16') 1
('16', 'null') 1
('Ä§-j-n', 'lokali') 1
('h-m-m', 'konna') 1
('null', 'Helsingor') 1
('Helsingor', 'NOUN-PROP') 1
('NOUN-PROP', 'Hels

In [6]:
def Split(corpus):
    
    corpus = open(corpus, "r")
    
    for line in corpus:
        
        words = np.array(line.split())
        
    train, test = train_test_split(words, shuffle = False)
    
    return train, test

# Part 2 - Building a Language Model

## Vanilla

In [7]:
def VanillaUnigram(train):
    
    model = Counter(train)
    
    for word in model:
        model[word] = model[word]/len(train)
        
    return model

def VanillaBigram(train):
    
    model = Counter([(word, train[i + 1]) for i, word in enumerate(train[:-1])])
    counter = Counter(train)
    
    for word in model:
        model[word] = model[word]/counter[word[0]]
        
    return model

def VanillaTrigram(train):
    
    bigram = Counter([(word, train[i + 1]) for i, word in enumerate(train[:-1])])
    trigram = Counter([(word, train[i + 1], train[i + 2]) for i, word in enumerate(train[:-2])])
    
    for word in trigram:
        trigram[word] = trigram[word]/bigram[(word[0], word[1])]
        
    return trigram

## Laplace 

In [8]:
def LaplaceUnigram(train):
    
    model = Counter(train)
    
    for word in model:
        model[word] = (model[word]+1)/len(train)
        
    return model

## UNK

In [9]:
def UNKUnigram(train):
    
    counter = Counter(train)
    model = {}
    model["<UNK>"] = 0
    
    for word in counter:
        if counter[word] == 1:
            model["<UNK>"] += 1
            
        else:
            model[word] = counter[word]
            
    for word in model:
        model[word] = model[word]/len(train)
        
    return model

def UNKBigram(train):
    
    unigram = UNKUnigram(train)
    
    for i, word in enumerate(train):
        if not (word in unigram):
            train[i] = "<UNK>"
            
    return VanillaBigram(train)

def UNKTrigram(train):
    
    unigram = UNKUnigram(train)
    
    for i, word in enumerate(train):
        if not (word in unigram):
            train[i] = "<UNK>"
            
    return VanillaTrigram(train)

## Probability

In [10]:
def UnigramProbability(unigram, sentence, word):
    return unigram[word]

def BigramProbability(bigram, sentence, word):
    if (sentence[-1], word) in bigram:
        return bigram[sentence[-1],word]
    
    else:
        return 0
    
def TrigramProbability(trigram, sentence, word): 
    if (sentence[-2],sentence[-1], word) in trigram:
        return trigram[sentence[-2],sentence[-1],word]
    
    else:
        return 0

## Generate

In [2]:
def Generate(train, bigram, sentence, last = "", count = None):
    
    print("Via What Model would you like to Generate?\n")
    choice = input("Enter Model:")
     
    print("Generating via " + choice + "...")
    
    if choice is "Laplace":
        if (count != 0 and (sentence[-1] != last)):
            
            counter = Counter(train)
            bigrams = {}
            
            for word in counter:
                x = (sentence[-1], word)
                if x in bigram.keys():
                    bigram[sentence[-1],word] = (bigram[sentence[-1], word] +1)/(counter[sentence[-1]] + len(counter))
                    
                else:
                    bigrams[sentence[-1], word] = 1/(counter[sentence[-1]] + len(counter))
                    
            weights = np.array(list(bigram.values()))
            norm_weights = weights/np.sum(weights)
            
            resample = np.random.multinomial(1, norm_weights)
            key = list(resample).index(1)
            value = list(bigram.keys())[key]
            
            sentence.append(value[1])
            
            if count != None:
                Generate(train, bigram, sentence, last, count -1)
                
            else:
                Generate(train, bigram, sentence, last)
                
        return sentence
        
    elif choice is "UNK":
        
        
    elif choice is "Vanilla":
        
        
    else:
        print("Invalid Choice\n")

  if choice is "Laplace":
  elif choice is "UNK":


SyntaxError: 'break' outside loop (<ipython-input-2-242b316673f4>, line 41)