In [19]:
# ** = Change location if needed
# Weight system is needed: on tags and title and for LDA to pass to LSI

In [10]:
# Seting up the environment:
# Reunite all the text file with the news in a folder called 'Analysis'
# Set up the basic path

basic_path = '/root/Desktop/' # **
path = basic_path + 'Analysis'

folders = ['/Il Corriere - Sample'] 
nick_names = ['/Corr']


choose = 0 # chose the number that represent the journal

actual_journal = [path +  folders[choose], nick_names[choose]]
print(actual_journal)

['/root/Desktop/Analysis/Il Corriere - Sample', '/Corr']


In [11]:
# Complete Analysis
# -----------------------------
# Import library

import re
import ast
import json
import nltk
import time
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

In [12]:
# Data loading

data = []
with open(actual_journal[0] + '.txt') as f:
    for line in f:
        data.append(json.loads(line, encoding='utf8'))
#        print(line)

        
collection_testi = []
for i in range(0, len(data)):
    if len(str(data[i]['text'])) > 10:  # Delete the news with no text
        collection_testi.append(str(data[i]['title']) + str(data[i]['text']) + str(data[i]['tags']) + str(data[i]['resume']))
        
    

In [13]:
for i in range(0, len(collection_testi)):
    f= open(actual_journal[0] + actual_journal[1] + "%s.txt" %str(i) , "w+") 
    f.write(collection_testi[i])
    f.close()


In [17]:
nltk.download("stopwords")
stop_words = set(stopwords.words('italian')) 
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
# Routine for cleaning documents:
def clean(path):
    with  open(path, 'r') as to_clean:
        one_line = ''
        for string in to_clean.readlines():
            one_line += string.lower()
    to_clean.close()
    
    # Special espressions of html format
    to_replace = ['\\n','\\t','\\r', '\\', '&nbsp'] 
    
    for item in to_replace:
        one_line = one_line.replace(item,' ') 

    cleaned = ' '.join(word for word in one_line.split() if len(word)>1)
    
    # All other special characters
    definitive = re.sub('[^a-zA-Zàéòùè]', ' ', cleaned)
    
    # Just the stopwords remain
    word_tokens = word_tokenize(definitive)
 
    filtered_text = [w for w in word_tokens if (not w in stop_words) and (w!='np')]
    cleaned = ' '.join(word for word in filtered_text if len(word)>1)
    
    with open(path, 'w') as to_clean:
        to_clean.write(cleaned) 
        
    to_clean.close()
    
    return

In [15]:
# Routine for building an interpreter of the stemmed words

def rebuild(parole_intere, parole_stemmizzate):
    vocab = dict()
    freq = dict()
    
    for i in range(0, len(parole_intere)):
        
        if parole_stemmizzate[i] not in vocab:
            vocab[parole_stemmizzate[i]] = [parole_intere[i]]
            freq[parole_intere[i]] = 1
        elif parole_intere[i] not in vocab[parole_stemmizzate[i]]:
            vocab[parole_stemmizzate[i]] =  vocab[parole_stemmizzate[i]] + [parole_intere[i]]
            freq[parole_intere[i]] = 1
        else:
            freq[parole_intere[i]] = freq[parole_intere[i]] + 1 
            
    
    return vocab, freq

In [18]:
stemmer = SnowballStemmer("italian") 
stemmed_words = []
whole_words = []

for i in range(0, len(collection_testi)):
    file_path = actual_journal[0] + actual_journal[1] + str(i)+ '.txt'
    

    cleaned_text = clean(file_path)

    full_cleaned_text = str("")

    with open(file_path, 'r') as f:
        for line in f.readlines():
            full_cleaned_text = full_cleaned_text + " " + str(line)

    tokens = nltk.word_tokenize(full_cleaned_text)
    whole_words = whole_words + tokens
    stemmed_text = ""

    for token in tokens:
        stemmed_text = stemmed_text + " " + stemmer.stem(token)
        stemmed_words.append(stemmer.stem(token))
    with open(file_path, 'w') as f:
        f.write(stemmed_text)


In [19]:
interpreter, counter = rebuild(whole_words, stemmed_words)
# print(interpreter)

In [20]:
import findspark
findspark.init("/usr/local/spark") # **
import pyspark
from pyspark import SparkContext
sc = SparkContext()
from pyspark.sql.session import SparkSession
spark = SparkSession(sc)

In [21]:
from pyspark.sql import SQLContext, Row
from pyspark.ml.feature import CountVectorizer
from pyspark.mllib.clustering import LDA, LDAModel
from pyspark.mllib.linalg import Vector, Vectors
from pyspark.ml.feature import StopWordsRemover

lista_topic = []
lista_pesi = []

#------ STARTING LDA PROCEDURE ----------
start_LDA_time = time.time()

for news in range(0, len(collection_testi)): 
    
    file_path = actual_journal[0] + actual_journal[1] + str(news)+ '.txt' 

    data = sc.textFile(file_path).zipWithIndex().map( lambda cleanwords_idd: Row(idd = cleanwords_idd[1], cleanwords = cleanwords_idd[0].split(" ")))
    docDF = spark.createDataFrame(data)


    Vector = CountVectorizer(inputCol="cleanwords", outputCol="vectors")
    model = Vector.fit(docDF)
    result = model.transform(docDF)

    corpus = result.select("idd", "vectors").rdd.map(lambda x_y: [x_y[0],Vectors.fromML(x_y[1])]).cache()

    # Cluster the documents into k topics using LDA
    ldaModel = LDA.train(corpus, k = 2, maxIterations = 100, optimizer='online') # em
    topics = ldaModel.topicsMatrix()
    vocabArray = model.vocabulary

    weight = ldaModel.describeTopics(maxTermsPerTopic = 12) # Pay attention to the number of maxTermsPerTopic 
                                                            # it must be consistent

    wordNumbers = 12  # number of words per topic
    topicIndices = sc.parallelize(ldaModel.describeTopics(maxTermsPerTopic = wordNumbers))
    
    def topic_render(topic):  # specify vector id of words to actual words
        terms = topic[0]
        result = []
        for i in range(wordNumbers):
            term = vocabArray[terms[i]]
            result.append(term)
        return result
    
    
    topics_final = []
    topics_final = topicIndices.map(lambda topic: topic_render(topic)).collect()
    
    i = 0 
    print('---------- NEWS n° %s' %news)    
    for topic in range(len(topics_final)):
        print ("Topic" + str(topic) + ":")
        j = 0
        terms = ''
        for term in topics_final[topic]:
            print (term + '  ' + str(weight[i][1][j]))
            terms = terms + ' ' + term
            lista_pesi.append(weight[i][1][j])
            j += 1
        i += 1
        print ('\n')
        lista_topic.append(terms) 
        
print("--- The LDA procedure took  %s seconds  ---" % (time.time() - start_LDA_time))

---------- NEWS n° 0
Topic0:
pot  0.006328716055309242
fam  0.006208742242230946
nient  0.006129731723750403
vol  0.006072223383553896
dop  0.006045901254260223
dov  0.005997727499597295
mort  0.005970311461602011
altri  0.005848417754195531
omicid  0.005834498687304169
capac  0.005831271075780091
colpevol  0.005798292507635383
mal  0.0057975575232233865


Topic1:
riin  0.00668553930986834
cos  0.006588499977436611
bagarell  0.0063419405585640656
maf  0.006044750042755046
dir  0.006021733793793081
ignor  0.005836690221697481
cap  0.00580026492044782
capitol  0.0057961138009314074
lett  0.005787426573563398
ginocc  0.005741121847403726
anni  0.005707630392235834
pens  0.005680250604422833


---------- NEWS n° 1
Topic0:
imam  0.006630665485775352
nacional  0.006604282130613948
stat  0.0059940546654387965
barcellon  0.00592229588540704
es  0.005863673243011137
rambl  0.005858876612972064
cni  0.005772762011057636
abouyaakoub  0.005680620889591882
terror  0.005667393970588512
agent  0.0056

---------- NEWS n° 12
Topic0:
sottomarin  0.007565199915765102
problem  0.006284026205455222
san  0.0060816139180759335
nè  0.006002176930970517
argentin  0.005926520966318171
nav  0.005869339688421538
poss  0.005697055706170847
raccont  0.005658999144291947
convenzional  0.005592847669910123
oltre  0.005587703408136526
ancor  0.005577542695218741
altri  0.005552393799506232


Topic1:
frequenz  0.006724315160463365
compliment  0.005947557772947331
problem  0.005856167724797652
soltant  0.005825283391935666
mezz  0.0057843691262347684
portavoc  0.005697383909752708
punt  0.005690648241026487
antenn  0.005651568223285269
aires  0.0056408856783567935
domand  0.005605036493676374
bas  0.005593693042104498
dett  0.0055905695838510995


---------- NEWS n° 13
Topic0:
arma  0.010553493100414835
cittadin  0.010351669313585655
special  0.010339042576646624
paes  0.0102615067414504
possed  0.009870818931842582
adott  0.0098168290966946
cos  0.009799278294361476
scend  0.009723640177161624
animal 

---------- NEWS n° 24
Topic0:
vett  0.01895109152426414
manten  0.01815327665733544
dev  0.017877547763224512
pallon  0.017542676752732907
scudett  0.01729191176321833
entramb  0.017230801799296588
vint  0.017152848075077075
europ  0.016937247735031042
vinc  0.016776517035964722
sin  0.016772504155768458
centrocamp  0.0167320434530572
spicc  0.01658240907141098


Topic1:
derby  0.019902066006651495
protagon  0.01904257825783331
laz  0.018285269331756546
vett  0.01767049317577544
duemil  0.017515034455279268
nov  0.01722935345422143
rom  0.017211269131647184
prov  0.01712584690871852
entramb  0.016672424201895412
elimin  0.016621368248991637
dev  0.01637617261225812
ultim  0.016238610422024624


---------- NEWS n° 25
Topic0:
vittim  0.012754658839547945
ragazz  0.012694549030356229
prim  0.012487206599897013
tend  0.012094157081447598
abbi  0.012032184249470489
scardin  0.011966842495382416
facil  0.011800360981963187
respint  0.01164449688139707
esser  0.01150870046816316
incub  0.0114

---------- NEWS n° 36
Topic0:
melegatt  0.008528291242186686
arriv  0.008186287173712026
produzion  0.008015584219656527
fond  0.007630940505358731
chairman  0.007529382060012362
stat  0.007387520719688927
pandor  0.0072010645261371825
commissar  0.007130539247745587
futur  0.007104669111109449
ricominc  0.007087836179345688
graz  0.0070843016589124345
giorn  0.007083390730762879


Topic1:
cris  0.007672246625986419
lavor  0.0075716549540670655
stip  0.007450374002580059
stor  0.007358154607936858
aggiunt  0.007339885756261175
quagin  0.007323003324235619
tribunal  0.007278344429012384
sosten  0.007264108700578152
provenient  0.0072005331264643115
prev  0.007166162200683294
qual  0.007163078628505686
stess  0.007158164837858123


---------- NEWS n° 37
Topic0:
support  0.0066668757731303385
affinc  0.006325770994575237
consent  0.0063163873538619126
ordin  0.006227622873318276
sempr  0.006215979709424726
massim  0.006180807577897548
mentr  0.006172810028318638
merc  0.006167929472399366

---------- NEWS n° 48
Topic0:
far  0.017086060289739854
compagn  0.013742750244298331
vogl  0.013227049168272938
alital  0.01169840748441211
lufthans  0.01136219263790879
ministr  0.01125418953325178
delr  0.010963620804556715
vol  0.01080507069056015
situazion  0.010713734190660115
commissar  0.010710576484924784
afric  0.010639111531068998
tedesc  0.010581005802486033


Topic1:
vol  0.010446743311723353
  0.00978901884845379
spieg  0.009703315498631576
trasport  0.009662487909294205
tropp  0.009627648913361823
met  0.009612103290000482
cit  0.009493094880430222
alital  0.009467527542878329
compagn  0.009374439785166524
infatt  0.009289379448268862
tedesc  0.009226464614247072
success  0.009196310923712704


---------- NEWS n° 49
Topic0:
dop  0.008922098571590343
anni  0.008384356727944381
mort  0.008289088320534424
oral  0.008153586727026157
sent  0.008116376956819249
lung  0.008009505365445577
ann  0.008008217272408429
prim  0.008005827043122811
formal  0.007974036942864306
sylveste

---------- NEWS n° 60
Topic0:
vinc  0.01008077210520229
dic  0.009620782781591036
rom  0.00959856970659309
quand  0.0095345959137615
ital  0.009256454531322966
quel  0.008841800653738425
ora  0.008753849648363995
gioc  0.008350810151457012
sol  0.008112284604577285
due  0.00805736448968001
dev  0.008031330954580319
falca  0.008003507703303705


Topic1:
robert  0.008109213299936177
imball  0.007962531652013556
strascic  0.007579222978289797
gliel  0.007455152221078551
già  0.007411197207369119
irreal  0.007385097145145096
dov  0.007364014299761132
vint  0.00725989814568798
davver  0.007257910465056125
falca  0.0071935371819842825
conosc  0.007179068616777213
anni  0.007154352304781679


---------- NEWS n° 61
Topic0:
accus  0.018555542940535144
ser  0.014635238987850817
stat  0.014203816130724927
tambor  0.012937677264131298
protagon  0.012405697589685159
ex  0.012375192849133333
ora  0.012131594655768409
sessual  0.012119339677958628
predator  0.011950330314835873
assistent  0.011940614

---------- NEWS n° 72
Topic0:
cas  0.004317458633967161
professional  0.004200136040816948
reinser  0.004129864603942573
team  0.004102042532738278
aiut  0.0040993298095538325
investig  0.004096277033445169
riusc  0.00406922796058115
mar  0.0040575939433221565
neurodegener  0.004016020413811518
casal  0.004005409168522327
altre  0.00397717449167072
integr  0.003968960543359263


Topic1:
anni  0.02155652630545908
mer  0.020117723570005155
italian  0.020054761127793454
ordin  0.019538695423327816
repubbl  0.0193756544036313
caval  0.011104022478446848
impegn  0.007351597625638625
ufficial  0.007348837429466442
vittim  0.006047675415440695
attiv  0.005654290639512597
oper  0.005629400185485631
testimon  0.005519329864036458


---------- NEWS n° 73
Topic0:
antibiot  0.031051783365837606
mil  0.021925098669387272
second  0.02046736288381772
anno  0.019960558278642876
person  0.019773277854255453
utilizz  0.01961421030331218
european  0.019377444888471086
europe  0.01937719255425763
mond  0.

---------- NEWS n° 84
Topic0:
good  0.012110511132552142
the  0.010446949865611966
fight  0.009902353877271316
ser  0.00972602778616475
original  0.008985764416291488
alic  0.008956438236592177
sol  0.008891564047706002
rest  0.008859393144720982
wif  0.008792730556982398
amer  0.00879221331224038
vittim  0.008778234924840405
godibil  0.008712181766388659


Topic1:
the  0.009027154673088757
ser  0.008947604596678826
good  0.008882143317150168
juliann  0.008685240742994834
wif  0.008627650885687326
margulies  0.008575738716882813
realizz  0.008435818267546032
fin  0.008397771215603237
asinell  0.008287841040168425
dev  0.008252081684171122
vit  0.008242566791997998
particol  0.008238782364306625


---------- NEWS n° 85
Topic0:
cap  0.026995247536289485
ten  0.026867460266386034
cad  0.026001352636937605
dispens  0.02596268987500546
legger  0.025796723574351726
pied  0.02545742000028655
ogni  0.025080897989194485
qualcos  0.024914037564790773
sgranocc  0.024904112699209814
frutt  0.02484

---------- NEWS n° 96
Topic0:
rally  0.010514025738387785
sempr  0.01013196137619281
giappones  0.01005575166953194
sti  0.010055321535440635
sal  0.009931671484377288
vittor  0.009891780711298596
trazion  0.009759339981078897
novant  0.009750307886596464
chiud  0.00974404789837916
integral  0.009621628721679456
oro  0.009512150631897387
color  0.009482572165135131


Topic1:
differenzial  0.010436990753054303
wrx  0.01038858254243832
cas  0.010379050137910732
rally  0.010195168998652186
blu  0.010159083471574001
year  0.010017513346424917
poc  0.009817921635224278
superc  0.009795238633925297
addi  0.009782308846079267
sti  0.009691963761559043
anni  0.009677926162360732
mondial  0.009577096642132858


---------- NEWS n° 97
Topic0:
eur  0.013917254538114633
cas  0.012813938238174512
mil  0.012806152521192389
cost  0.012646578726112582
acquist  0.012568765575563533
struttur  0.012116536374702077
comfort  0.011694320653815756
modul  0.011456896551975758
pieghevol  0.011284935918074758
ap

---------- NEWS n° 108
Topic0:
softwarin  0.005735695405701039
perfezion  0.005552605412279696
venn  0.005536650284720276
rappresent  0.005505623172311267
scriv  0.005504285676811706
poc  0.0054683577808053935
aggiunt  0.005365317975612684
franc  0.005301428012079464
indifferent  0.0052808292746306315
collezion  0.005240969176326645
spaz  0.005222028584315922
anim  0.005211689671434264


Topic1:
gioc  0.006905777887723707
commodor  0.006609250931687685
libr  0.0064800971548306125
videogioc  0.006319044038349153
fin  0.0061964383782633385
pur  0.00590050914910773
sol  0.005851212731027923
dic  0.00554680095466207
cas  0.00546895077373017
prim  0.005413233689732295
cert  0.005369536172829861
pc  0.005348790668176752


---------- NEWS n° 109
Topic0:
conferm  0.009285271843213594
stipend  0.00875283706703407
vers  0.00837711396260781
inser  0.00828522224569938
quest  0.00828513152975674
stabil  0.008283788126303511
soprattutt  0.00824047726300232
già  0.00815319197183758
port  0.0081351773

---------- NEWS n° 120
Topic0:
no  0.0063445518511592
cecil  0.005921293029653266
clic  0.005887300489592789
ricord  0.00575160596546324
pront  0.005723668077840538
argentin  0.005698753710993664
show  0.005693580456904535
quand  0.005550870593476835
nott  0.00550331104663858
ignaz  0.005469197657376924
vist  0.005465579229123986
dirett  0.005429019070635896


Topic1:
clic  0.01134191215620825
bell  0.007828901329474777
fin  0.007302885516490086
stor  0.0070394090929634555
quand  0.006601471969530536
rodriguez  0.006347690453328821
fors  0.006344372653471518
conclud  0.006134515598264457
ora  0.006010157960144158
moment  0.005987349272799937
sorell  0.00588569519872038
no  0.0057657190746035515


---------- NEWS n° 121
Topic0:
violenz  0.006924021204988309
soprattutt  0.006552824208413163
avven  0.006418739254480913
element  0.0063292820887446725
sband  0.00616586978822415
chiav  0.006117119238085182
uscit  0.00608095532925568
sangu  0.006053690387764557
distanz  0.005993937859821241
o

---------- NEWS n° 132
Topic0:
bracc  0.0060606832605148535
conosc  0.005948953965169524
nott  0.005870089353574419
testimon  0.0058552656679327
sospett  0.005753734054978059
bord  0.005748957144597587
potrebb  0.005741052096679849
tutt  0.005718069487654791
rintracc  0.0056704969534089825
messinscen  0.005664369097791118
ospedal  0.005660483220605907
pian  0.005660202757378589


Topic1:
propr  0.006641654248641943
ferm  0.00659405494793365
ex  0.0062148627341854275
ragazz  0.006179858367211157
bar  0.006157903564909182
palazz  0.006131440823146895
poi  0.006049368296924297
violenz  0.006032657994617899
stat  0.00600327574561167
uffic  0.0059972352401825935
port  0.005927562945107714
violent  0.005857850531978768


---------- NEWS n° 133
Topic0:
stat  0.00841637962636721
attiv  0.008148306878523578
tecnic  0.008136124290628025
credibil  0.008117732523788208
prefett  0.00804984879777107
riunion  0.007868118209473168
pot  0.007841140915701638
far  0.007818975429406329
protezion  0.007718

---------- NEWS n° 144
Topic0:
concep  0.011106393954482747
potrebber  0.010875042658388213
decis  0.010858899953382128
calor  0.010406973453335754
riscald  0.010402961737120063
solar  0.010381615321999353
resistent  0.01030958250242899
pass  0.010290936492572024
centralizz  0.010199394943134148
consegn  0.01017691073580092
luog  0.010172763972506122
essenzial  0.010005436796871065


Topic1:
mil  0.012271423470021199
cost  0.011248571844515729
terremot  0.011197920152060227
decis  0.010954708797994447
eur  0.010762940242007647
acquist  0.010641616697175942
cas  0.010481837850512437
modul  0.010258776176948783
abit  0.010231028068446248
spost  0.01019520096862556
scal  0.010106543911633384
uso  0.009999935343958759


---------- NEWS n° 145
Topic0:
insult  0.0034947818591242715
merit  0.0033960409576598116
legg  0.0033852358100129455
umil  0.0033737505048479486
genitor  0.003328098846713383
somm  0.0032769930236633874
sospett  0.0032764661734945993
cucin  0.003223782506889463
studios  0.

---------- NEWS n° 156
Topic0:
acces  0.007424826891181019
caracat  0.007411828813040396
veterinar  0.007308552084489204
can  0.007084538747728879
modal  0.007079162167495032
mod  0.0070523375348498975
buon  0.007048423748528623
esser  0.006957496546607997
port  0.0069474641274257145
aiut  0.006907461218509428
web  0.0068922286595083215
giorn  0.006888616925819169


Topic1:
animal  0.00977641468112339
tratt  0.008961745760479149
cucciol  0.008338374994347105
donn  0.008303948336535751
caracal  0.007973076906535039
mil  0.007843526835989923
cos  0.007775043594051723
ibrid  0.0077443415713388765
giorn  0.0077346393118481105
mett  0.0074772045573738375
mod  0.00733746883868791
giardin  0.007314116208869358


---------- NEWS n° 157
Topic0:
ripos  0.008963377533458078
ultraortodoss  0.008491874330085462
part  0.008163864640480762
attratt  0.007915587040900458
necessar  0.00786990223746227
temp  0.007821123697038173
trascin  0.0077644648817118585
govern  0.007675327748114016
regol  0.0075339

---------- NEWS n° 168
Topic0:
affid  0.0038627767301771068
caprott  0.0038554702862217197
rimaser  0.0037222296782836485
frigorifer  0.003677912960198289
jones  0.00364800100455749
sfil  0.003587933252961758
oggi  0.003505322431080522
proib  0.003475704929779441
sed  0.003471701531283834
superstor  0.003462157382667645
avvezz  0.003446945019716247
svizzer  0.0034371442959439786


Topic1:
esselung  0.005180812393378146
ital  0.004753850780676231
mil  0.004510370541305937
marketing  0.0039793343545552985
divent  0.003911980987460969
supermerc  0.0038993371760272944
accompagn  0.003886049314334197
mod  0.003873489469071789
cant  0.0037970850899096153
anzi  0.0036399955988990172
cib  0.00363832596010097
anni  0.0035876627770984396


---------- NEWS n° 169
Topic0:
gatt  0.05303327147339875
  0.051559866014687
stran  0.04875007403085118
esemp  0.048136666558390655
vit  0.0474181209844302
audrey  0.04678537055106727
georg  0.04570008684251476
scelt  0.04477017507936853
mort  0.04413975416830

---------- NEWS n° 180
Topic0:
fall  0.010847767083641106
renz  0.008559531999770019
ora  0.00800145387942346
dic  0.007515657599733654
prim  0.007481429585447878
referendum  0.007395884502966221
ragazz  0.007285322000263936
tropp  0.007214746703317974
altri  0.007053065655740257
dop  0.00703359330308584
serviz  0.006884396245449688
quand  0.0068088805354848785


Topic1:
sconfitt  0.007499641295399888
div  0.007198294798106769
serviz  0.007146679675893196
dev  0.0070543261885165146
bonus  0.007013468178257936
anno  0.0069400999160389885
atto  0.00684002174720042
leopold  0.006815844113522772
nemic  0.006691719815619414
qui  0.006593049950011799
graz  0.006576274889921345
aver  0.006555756837364128


---------- NEWS n° 181
Topic0:
men  0.006025628753748548
shar  0.005876476308942245
not  0.005833745830750872
matte  0.005650779440194029
esemp  0.005585895754578389
scommett  0.0055241533940075605
lill  0.005517645719492957
ancor  0.005474588053950092
vedr  0.005452663556498444
va  0.00544

---------- NEWS n° 192
Topic0:
piazz  0.011266848771701799
vial  0.009468929090786128
km  0.008370745464895548
port  0.007758699285479048
cors  0.007743732495991472
vers  0.00760804897319711
maraton  0.0075455871473420364
arriv  0.007491070657265091
via  0.007403523033010596
part  0.007083428119457574
nuov  0.006874117937600367
ricalc  0.006833964919053025


Topic1:
nuov  0.00743712743953289
destr  0.00705936814602016
pass  0.007022788234225034
cors  0.006978926180316988
via  0.006897271540526081
amendol  0.006894332167661307
triatlet  0.006858659458224609
pisan  0.006833173258909743
fa  0.00683179481468736
ros  0.006824166234898977
strane  0.006813828523331123
ultim  0.006701293162617846


---------- NEWS n° 193
Topic0:
tv  0.0047769583852479295
prov  0.004725231018575647
affar  0.004611571412682491
pusher  0.004596240493797232
obblig  0.004590621261029111
vigil  0.004576358350057974
amic  0.004551968794614777
padov  0.004522214616215656
signorett  0.004506482094889252
sorvegl  0.0044

---------- NEWS n° 204
Topic0:
grand  0.005021605780045076
vinc  0.004917387869624136
part  0.004899929541529688
ancor  0.004891069077265744
mes  0.004887199899601226
due  0.004885941867647189
favor  0.004883335736710667
quadr  0.004849426898664245
sospension  0.004804847432019638
naumkin  0.004798427293914968
scheveningen  0.004788848485574803
sufficient  0.004755035350389217


Topic1:
part  0.0069394666219251624
stat  0.006459336308035191
anni  0.006406823400026823
mes  0.005992223530527568
alcun  0.0059361180380587105
scacchist  0.005798771286837096
maestr  0.00570569416262086
sembr  0.005365182218068971
grand  0.005333778917155884
italian  0.005278272696788527
eur  0.005195839623705709
fa  0.005172892135074263


---------- NEWS n° 205
Topic0:
donn  0.008405114665605232
consens  0.005879651920982597
cas  0.00554768535269802
anni  0.005280410258983222
poi  0.004753562405283945
weinstein  0.004533692708320995
stat  0.00450741633694334
fa  0.0044772992380012555
femmin  0.00416092088044

---------- NEWS n° 216
Topic0:
contestual  0.011362707507699785
cagliar  0.011137723029052003
unic  0.010945640756504161
raccogl  0.01086649498616712
neuromuscolar  0.010794975223557535
paol  0.010770279963319118
torin  0.010369267386625965
lavor  0.010314240096083298
anti  0.010297405973820888
spagn  0.010277395095775639
mil  0.010191693072069956
oltre  0.010158689830731505


Topic1:
sla  0.012624788195482519
centr  0.012245227167182519
paol  0.011762812857265104
mil  0.011469729618542998
malatt  0.011366014851001928
segu  0.011267849028606095
internazional  0.011253481781019424
eur  0.011175541593678059
nem  0.011101193840124552
incis  0.0109223745225029
ital  0.010827645494683
comit  0.010813761665442612


---------- NEWS n° 217
Topic0:
meredith  0.018670932348536125
tim  0.01586818860723313
accord  0.015214179511212273
miliard  0.013570776695951168
oper  0.01351685704120235
approv  0.013434472365504432
rispett  0.013232641618580846
dollar  0.012833662947824544
koch  0.0126379570290

---------- NEWS n° 228
Topic0:
settiman  0.007407926064215344
presid  0.006886827622417352
trov  0.006549679312282957
medic  0.006488935209280726
certific  0.006418717789355991
fantasm  0.005830115935873671
cas  0.005734570473827341
docent  0.0055865041933926855
scuol  0.0055683893837835015
supplent  0.0054964900197167574
cattedr  0.005150440456421867
senz  0.004934649459639418


Topic1:
sistem  0.005111586024069269
aul  0.00497848130661559
rispost  0.0049294913999274265
leg  0.004882004350119306
senz  0.004828269688437476
ultim  0.004657101920361029
dic  0.004613868257770885
settembr  0.004582118908042674
compens  0.0045554087664611475
ruol  0.004553779663363289
chiar  0.004509649222958436
altre  0.004506874647245954


---------- NEWS n° 229
Topic0:
stat  0.006137009956330298
clausol  0.006066952627661334
reiter  0.005916272703666916
rimast  0.005803613983782622
uom  0.005771737724823115
liber  0.005648874732652656
infatt  0.005634148058850724
indetermin  0.0056221654108529065
giorn  

---------- NEWS n° 240
Topic0:
arriv  0.01386082903818419
med  0.013681284864899872
listin  0.013645015591988774
fin  0.013505165998584956
societ  0.013444807872585698
  0.013299112133873454
spint  0.013156187350561283
global  0.013093955311203047
segu  0.013084880255405
mib  0.01306869933176862
no  0.013049515130405564
eni  0.012968803773519963


Topic1:
societ  0.014825632864986225
titol  0.014095076974860803
esser  0.014089720217871885
street  0.013669263359047938
fin  0.013637756956115802
paus  0.013563342308000706
gennai  0.013537274711648844
ital  0.013464171555063527
esist  0.01338304198520456
cap  0.013323681209357566
global  0.01326074077787885
arriv  0.013205059160117957


---------- NEWS n° 241
Topic0:
illumin  0.07128154400669773
colp  0.06513829364990512
pagell  0.06415258277621443
falqu  0.06384736989496524
montol  0.06110418359717323
belott  0.060867475975965896
rosson  0.0607291326995936
tap  0.060256692084699116
iag  0.059077245201327705
miracol  0.05902106151426001
se

---------- NEWS n° 252
Topic0:
  0.043432863036231686
piazz  0.04108720354676621
venditor  0.04068361879263826
eur  0.040164238738572056
sequestr  0.039770841070937944
stat  0.038969463871138046
cinquemil  0.03841902155077434
palloncin  0.03837794923889586
vigil  0.037884910221036974
dop  0.03771740508908066
san  0.037694122714367645
commin  0.03671056627181934


Topic1:
eur  0.04085120492979554
municipal  0.04058269436566997
torin  0.03996182132109072
carl  0.039940026414429376
vigil  0.039537293484840835
allontan  0.03936413167158692
passant  0.03771689663255583
sprovvist  0.03649476754518284
vergogn  0.03631317852532966
abus  0.036169273892570394
stat  0.03610265372834413
ambul  0.0360217601748022


---------- NEWS n° 253
Topic0:
labbr  0.03265731891980687
ari  0.029196004764262536
mot  0.028729145796098574
malign  0.028071233177184882
occorr  0.02786260331332728
poic  0.02747106446950125
sempr  0.02733382112080014
protegg  0.026960266155443686
estrem  0.02686347573555174
sol  0.026

---------- NEWS n° 264
Topic0:
edizion  0.007976335976841593
storic  0.007641672145658264
attor  0.0074908179484805635
tri  0.007485315379746617
ved  0.007376875141797141
angel  0.007342644279392898
sent  0.007336115239415378
potrebb  0.007281849236494624
show  0.007266597257542158
approd  0.007264770576454439
div  0.007263422700334497
lanc  0.007242245356031192


Topic1:
rai  0.008575065136537559
direttor  0.008546453113315589
carl  0.007811823895363157
luc  0.007730005446944281
prossim  0.007709473879223601
affianc  0.007695631151796915
ragazz  0.007501747015823788
pereg  0.0074182809149195925
rilanc  0.00733028283211671
canzon  0.007314133155544511
firm  0.007149769416643599
argenter  0.007099696229748209


---------- NEWS n° 265
Topic0:
pensier  0.005892966425128144
accett  0.005850361535781922
perc  0.005683155435553692
vid  0.00562497714149349
ora  0.0055959208538563865
scegl  0.005547849558470291
ancor  0.005537446142272231
ricord  0.00551265847228578
camer  0.005507896741373896

---------- NEWS n° 276
Topic0:
oltre  0.0102096794199561
lettur  0.010022323316597996
valor  0.009553875752731778
strument  0.00952083063049016
locazion  0.009509539379927032
affitt  0.009365583537923144
provved  0.009215755827650579
rappresent  0.009197893295188076
fin  0.009159927724992434
citt  0.009111605944877393
arriv  0.009057768751822686
decorr  0.009040582266766975


Topic1:
cred  0.013204229843369658
librer  0.012484498671946495
impost  0.011633873645922421
piccol  0.011458821564593793
dettagl  0.011425710565116256
approv  0.011021993920340534
eventual  0.010665354451315874
emend  0.010602542620046318
tar  0.010387360069475028
molt  0.010312017920764204
imu  0.010123401220247944
bilanc  0.010008741363123496


---------- NEWS n° 277
Topic0:
strag  0.016917773832492607
pres  0.015740001296259732
faun  0.015733204895045715
protett  0.015468076764539348
cur  0.015395886194667538
infatt  0.015250099040516921
veng  0.01519008748888059
mism  0.01432910021447682
libert  0.01431701815

---------- NEWS n° 288
Topic0:
churchill  0.006871212807694383
prim  0.005980100538868539
sempr  0.005138871905967674
vien  0.004585360866303956
ministr  0.004539958887772046
giorn  0.004409265508190858
ora  0.004295542534850052
ital  0.004281504839275617
dop  0.004194735329766027
discors  0.004162771548180166
popol  0.004144895465671472
quand  0.004127177098114981


Topic1:
scotc  0.0037530382971449946
davver  0.0036907744612690034
macron  0.0036849158932523735
vendett  0.0036680000617607673
privileg  0.0036270751735337087
chamberlain  0.00362405108816116
gennai  0.00360426709116637
tropp  0.0035783130231597004
consigl  0.003571179320571265
piagnucol  0.003566946029623457
propr  0.003527800105470715
sangu  0.0035242636849677433


---------- NEWS n° 289
Topic0:
merc  0.006953680289088043
va  0.005971290942770652
tracc  0.0059179945128202075
diongu  0.005885601305366317
dov  0.00586777301314802
accoglit  0.005799495510514934
mond  0.005719395808038416
furt  0.005711745383690688
franc  0

---------- NEWS n° 300
Topic0:
tren  0.010685979890881646
banchin  0.010273142328715103
stazion  0.00957003596799964
binar  0.008248868340853494
atm  0.00812402164737145
barcoll  0.007977074414975291
dipendent  0.007916281744557066
dop  0.007901872387154276
sicurezz  0.007686593972381928
monitor  0.007649826736440513
quand  0.007641289113471755
second  0.007563191708718533


Topic1:
pen  0.007709166285213092
per  0.00730200538547672
mov  0.007247126089314624
parametr  0.007104588274154741
ombra  0.006941037478507207
attenzion  0.006932080448444125
tren  0.0069275214475624
chiusur  0.006835392002265824
isabell  0.006781954319406337
barcoll  0.006754560097375258
poi  0.006747637520819079
san  0.0067054414426649675


---------- NEWS n° 301
Topic0:
inaugur  0.019057810679258182
uscent  0.017736226122135733
vegas  0.01681787637711391
de  0.01631824151561141
buon  0.01630128069663972
dispon  0.01629827027297675
rispost  0.016261033233831428
consob  0.016203389510057294
authority  0.015921385

---------- NEWS n° 312
Topic0:
ser  0.0057638166757590795
consegut  0.00561666841346863
  0.0056140905881862276
cellul  0.005599904775932303
stat  0.005567579243454522
raccont  0.0055373088732398575
stud  0.005520782204667648
quel  0.005489791858510342
effett  0.005482546469271835
mamm  0.005479692375775522
mov  0.005467802924325832
soc  0.005447594061646091


Topic1:
andre  0.007480386380357482
ros  0.007362652179709197
carabinier  0.006968592631050024
scompars  0.006621440184259597
nessun  0.006261469659791169
person  0.00617265666412194
timor  0.00600146045889323
societ  0.0057994290609690155
amic  0.005797027179583453
mattin  0.005688597356558088
mai  0.005670851237163712
ufficial  0.00566460874139341


---------- NEWS n° 313
Topic0:
strument  0.0045098190847185846
piazz  0.004286732783292863
fin  0.004240665558888957
commerc  0.004212408990063301
anell  0.0041756954080922936
nov  0.00414451024945816
client  0.004127309828744995
poi  0.004090648005056789
portafogl  0.00406904293141

---------- NEWS n° 324
Topic0:
fattor  0.010577571072603011
invec  0.009924455742762586
mil  0.009691541287175044
tocc  0.00934337972660506
registr  0.009323911010528199
sconfort  0.00931522320950464
cittadin  0.009283632426528207
immed  0.00907630479262585
cris  0.00903110710304179
principal  0.008999409704148796
primissim  0.008987131100768061
nasc  0.008985728714461858


Topic1:
figl  0.015066100103711856
cal  0.012671589616423465
donn  0.012487852283363235
diminu  0.011452690803312808
numer  0.011378089469712428
men  0.011063945782255932
mil  0.010711928369689974
italian  0.010119366413553554
nat  0.009997208322854006
anni  0.009972702763419751
nasc  0.009412557378717558
statist  0.009373146274771186


---------- NEWS n° 325
Topic0:
chiam  0.007009831010189503
neppur  0.006670044282770891
aforism  0.006513301174829017
televis  0.006430605933292795
giocator  0.006426885599108978
concentr  0.006404173813723275
bar  0.0063892769020900885
agnell  0.006387506005111638
tifos  0.006347477

---------- NEWS n° 336
Topic0:
sfond  0.007608490912605718
decis  0.006985587049607628
error  0.006966991882844681
twitter  0.006825050352467546
dibatt  0.006790838768984378
chiam  0.0067872888169583315
indirett  0.006762713840718024
destr  0.006762166370215054
critic  0.006684922018699077
ancor  0.006671353837223121
amman  0.006617144217602201
dop  0.006595904829858182


Topic1:
trump  0.01239220047242415
may  0.00949659017132503
sbagl  0.00881445551218151
stat  0.008699668413781343
president  0.007923767090462152
invit  0.007648788430293969
odi  0.007621710983221342
grupp  0.007538270101568268
theres  0.007452908934946372
twitter  0.007266266629055966
rudd  0.007171112286649316
first  0.007098589804452101


---------- NEWS n° 337
Topic0:
francesc  0.005795978099460413
cresc  0.005710646762662269
president  0.005409911111878007
lavor  0.005350483879467309
cerimon  0.005312132477916198
miglior  0.005105333346803866
polit  0.005080876660902862
ex  0.005070672123151299
paes  0.0050469551

---------- NEWS n° 348
Topic0:
special  0.002454308985464987
prim  0.00241754401298706
passagg  0.0023782861898075857
orecc  0.0023695692775309593
capac  0.002336702992901405
conclus  0.002325250340157744
cors  0.0023055642764787635
eredit  0.00229883608750824
creat  0.002295152014558608
accontent  0.002286652501635113
occasion  0.0022840129021814417
accompagn  0.0022685445542919907


Topic1:
isell  0.005231369990187901
port  0.0033703409428828147
carl  0.0033311618964908933
test  0.003244227729631109
anni  0.0030207333407135445
oper  0.0030070736203575026
edizion  0.002954858498314378
var  0.0029273164124400076
studios  0.0028395810884511274
appar  0.0028189261056782767
nuov  0.002776618921286419
metod  0.0027408048997086755


---------- NEWS n° 349
Topic0:
concret  0.005146348330365607
import  0.005116658468239505
istit  0.004989652619493263
consegut  0.004987921888715873
punt  0.004987867765255358
tant  0.004980977653072412
divent  0.004949404893612576
special  0.004861413018953835


---------- NEWS n° 360
Topic0:
stat  0.008529265049753503
ior  0.007572552160372949
aver  0.0073810972901163984
mattiett  0.006910850817670085
due  0.006564020883144877
aggiunt  0.006234552438044746
milon  0.006168449932568759
violazion  0.006128652945448146
sant  0.006125127473646283
general  0.00610611258983614
contest  0.005930300744297667
confront  0.005915382383923874


Topic1:
conferm  0.006588210697608057
respons  0.006143630439654698
realt  0.006017046709703372
trasfer  0.005862184103732321
rientr  0.005707272636827819
mand  0.005704649779361398
ufficial  0.005666665869618762
mattiett  0.005647131090002943
propr  0.005614763670547236
incar  0.005592423635925029
spieg  0.005548734343513674
possibil  0.005538524158798058


---------- NEWS n° 361
Topic0:
tumor  0.006479058726322567
googl  0.0061010546497587154
iniz  0.006031115907934403
istit  0.0057881309452867844
mod  0.0057579990198299525
affid  0.005694529867774658
esprim  0.005692980452656388
pres  0.005671710838334607
bianc 

---------- NEWS n° 372
Topic0:
scuol  0.006277956266286824
mai  0.006121913297097788
polit  0.005991397560358497
previst  0.005844432365273259
presid  0.005745052766078832
serviz  0.00572812363592488
consecut  0.0056764366703481155
class  0.005658518701641167
imped  0.005623733191797241
docent  0.005561423228148703
provincial  0.005551916889623259
alcun  0.005518816922473616


Topic1:
stat  0.006261403925955855
cattedr  0.005902177973647866
maestr  0.005840974206230775
norm  0.005688460525958773
lavor  0.005532507961022436
scuol  0.005528596140280538
polit  0.005510059665495845
spieg  0.005437040046526214
dic  0.005419540464936644
present  0.005383166333584303
vit  0.005359919808239849
rinnov  0.00530212170102059


---------- NEWS n° 373
Topic0:
anni  0.005658442535914639
oggi  0.005652461054164716
gener  0.005415901351575218
bitcoin  0.005302406214202299
strett  0.005282076312851834
men  0.005225080250158754
rest  0.005219338257439158
scend  0.0051791410826340974
cos  0.00514105737341

---------- NEWS n° 384
Topic0:
sem  0.006272704367531078
cos  0.0061291339331321915
dipart  0.006040567959980421
principal  0.005938121694274039
verdur  0.0059318642611778435
spunt  0.00591693371600573
soprattutt  0.005889388775994777
color  0.005845173159434788
pubblic  0.005822309284084095
vuol  0.005812861086980674
rapport  0.005803042017681355
compost  0.005793220645086572


Topic1:
ris  0.02087521991516265
ross  0.008318491900045693
polifenol  0.007688237104450727
stud  0.007547207171301568
ner  0.007407591452030672
integral  0.006886038215737396
mil  0.006750024821795383
propriet  0.006688011878942062
ricc  0.0064898479380861174
può  0.006475621669485803
stat  0.006392664325551446
verones  0.006347871341427148


---------- NEWS n° 385
Topic0:
aggrav  0.007176057956504479
fattor  0.006984050353778003
rai  0.006891577226849934
vicin  0.00687989983777568
alvez  0.006812684924569111
franc  0.006663043581165049
spad  0.00666122205903775
pm  0.0065752693067632866
incontr  0.00632548544

---------- NEWS n° 396
Topic0:
yar  0.026461482235546598
avvicin  0.021468968587091953
mond  0.0206448671045885
sembr  0.02050710623345161
sempr  0.020315703017783862
salv  0.020203235719355544
chied  0.020185340847087334
nott  0.019688457648063623
salvator  0.019479378771367756
uman  0.019296173344907228
ora  0.019046778666983406
imposs  0.018869978478236224


Topic1:
sempr  0.02137259472081751
riprend  0.02107821074787047
sbagl  0.020654508687697252
qualcos  0.020368937741928283
iren  0.019653514585019367
riesc  0.019404322222655486
ora  0.01935728136853373
andre  0.019268796075967
insiem  0.01925036236335545
cattur  0.019221180659512085
intenzion  0.018984736367371004
procr  0.018764021359559423


---------- NEWS n° 397
Topic0:
altissim  0.009316309801565281
ret  0.009251612897006382
maestr  0.00900526710995226
alunn  0.008998193899740301
sorrid  0.008986544283786795
sfott  0.008958822658146945
scandagl  0.008950992592248174
milanes  0.00881022232463376
qualcun  0.008809521539339298

---------- NEWS n° 408
Topic0:
disabil  0.009058274905941185
robot  0.006640979859669658
tecnolog  0.005917478382409084
asphi  0.0056004821032480355
progett  0.005527959457413664
person  0.005415120596686747
scuol  0.00483190942728568
lavor  0.004711530328059038
handimat  0.004693469636934306
bracc  0.0046169440615230805
access  0.004481429061049455
istit  0.004402998755185032


Topic1:
aggiung  0.0042156521411138015
superior  0.004199155753469904
dipart  0.0039033250644338047
mil  0.0038654219568795786
port  0.003855947513845158
selezion  0.0038527417374851617
fasc  0.003827449255930724
sping  0.00382499217837938
guid  0.003805683274688563
manifest  0.0038040205156601543
inform  0.003793677794243776
nomencl  0.003792120353844672


---------- NEWS n° 409
Topic0:
mueller  0.009992551168466658
elettoral  0.009636608687621884
flynn  0.009442737937473506
kislyak  0.009248284503612862
cos  0.008955724080768137
russiag  0.00893992046237813
aver  0.008888411933874128
spicc  0.0088227264871283

---------- NEWS n° 420
Topic0:
decis  0.02501388582084346
risvolt  0.024896289858757414
prov  0.024806847114177053
campion  0.02453462664802559
cap  0.024168998676535082
fra  0.02402760138449599
napol  0.024022715186998524
pronost  0.023810942862406738
nuov  0.023393744801414477
legg  0.02338225614632346
debutt  0.023099632654197313
due  0.02295717137271971


Topic1:
due  0.0291539031158316
potrebb  0.02542873089299875
ser  0.02460700391890372
gattus  0.02454977180068927
cap  0.023969366889040982
iniz  0.023950221086232605
oddo  0.023482418393122522
nuov  0.02338087183261756
juventus  0.023341724739401146
fra  0.023142269725391223
pronost  0.022980120883176983
facil  0.022864350723482908


---------- NEWS n° 421
Topic0:
barrier  0.014346395081731146
esser  0.013828772129978031
amo  0.013823618634723684
stat  0.013817035885030178
affid  0.013645064938247957
tutt  0.013591848389744523
denunc  0.013388112290565442
incaten  0.013336844059551552
sett  0.01333493736177461
abband  0.013295911

---------- NEWS n° 432
Topic0:
val  0.019550793548699888
schiaff  0.018780664596245136
stat  0.018444693529375857
maltratt  0.017635794083385063
riserb  0.017376141394994205
intern  0.01709224480869571
signif  0.01699554064619862
stabil  0.016871363028765976
massim  0.01684965995906917
install  0.016757410848123
avvolt  0.016731974153066327
compagn  0.016683917453353878


Topic1:
bui  0.018084974949436908
stess  0.017893886280931182
rest  0.01783151206545499
piuttost  0.017569144923192136
ore  0.01744696286231223
stabil  0.01742614138832106
carabinier  0.01729351635740285
assistenzial  0.017259964714495386
install  0.01725002462293565
psicolog  0.017054175421138528
costrett  0.01696877437631308
down  0.01688380300260082


---------- NEWS n° 433
Topic0:
moss  0.007259939947687198
notiz  0.007123575310601767
line  0.006975569825327571
ansa  0.006953673123992209
sed  0.006932471185974373
filtr  0.006846310471267606
arrest  0.006834167729286934
allontan  0.006758805662255544
cont  0.006750

---------- NEWS n° 444
Topic0:
mil  0.01668549547535698
stad  0.01627421214583213
cominc  0.015723924899488432
dat  0.015649984073962966
fan  0.01546035474772881
prevend  0.01528075229451475
band  0.015093251425385716
già  0.015067936562948157
part  0.014909283283651303
eugane  0.014736952243050019
tre  0.014727974123747602
ital  0.014681497710972746


Topic1:
rom  0.015742463838184236
acced  0.015076037935769688
registr  0.014657435693384527
ticketmaster  0.014589799325376481
avverr  0.014371623693807007
invec  0.014358526055817134
band  0.014350105018233179
stor  0.01427695481581943
event  0.014194638980400725
tre  0.014169040477287734
mercoled  0.014080689799394319
days  0.014048032223956832


---------- NEWS n° 445
Topic0:
femminil  0.007106553702746396
sempr  0.006790391489845903
gran  0.0067706852593824585
creat  0.006729624329586972
legg  0.006713275430156417
monarc  0.006703119714533654
yul  0.006585628956506916
tem  0.006550511658707496
innov  0.006534089902554963
casell  0.00

---------- NEWS n° 456
Topic0:
tim  0.019291879794114702
present  0.01905425587052104
riassum  0.018657515156429206
robottin  0.01854694007668215
second  0.01854471022737359
inclus  0.018061523994142126
echo  0.01795993738717141
vicin  0.017873412548524214
pix  0.01779962169867575
miglior  0.01774959173858797
può  0.017558611339352475
oscill  0.017511315560933074


Topic1:
fa  0.020601838021044518
  0.019001445608169333
tratt  0.018945896547976567
assistent  0.018840787628863792
miglior  0.01879431437144087
parl  0.01866081780367518
muov  0.018300886515983484
vocal  0.018094555599177515
mentr  0.017917067207042187
pot  0.017914961653066174
mod  0.017892040010755183
second  0.017887577588321754


---------- NEWS n° 457
Topic0:
dop  0.017337597708405997
clochard  0.0131436945331776
venez  0.012758350577756265
piazz  0.012159398149802197
mort  0.011689999412586865
stat  0.011630607711268384
princip  0.011003969050092325
rom  0.010892945894750131
social  0.01021014115387841
carabinier  0.0

---------- NEWS n° 468
Topic0:
person  0.006787491603508281
disabil  0.006715437888338525
onlus  0.006095722942954165
scatt  0.006087163986027959
assoc  0.005912860610376941
poi  0.005497576354904637
programm  0.00541335600099038
decin  0.005383366455547248
impegn  0.005314682980354079
chirurg  0.005300437213555723
oper  0.005274479611895856
progett  0.0051466304805376315


Topic1:
paralimp  0.005629946341179595
amministr  0.005377623855455824
sal  0.005372770865572389
altre  0.0052733257686876575
ragazzin  0.005150612483101762
onlus  0.005127662625700652
professional  0.005017651453625322
special  0.005005353089052125
intellett  0.00495895014342463
mond  0.004954505552437483
fattibil  0.004951091285773917
istit  0.004950613488749047


---------- NEWS n° 469
Topic0:
mercatin  0.020971299388499995
pacc  0.018447508471973142
poliz  0.01805468963681787
sospett  0.017387922812379896
esplos  0.017194641570500428
stat  0.017162202521475555
evacu  0.01708998475056863
potsdam  0.01625228125654

---------- NEWS n° 480
Topic0:
turist  0.008186902653635102
prenot  0.008092621727127382
airbnb  0.00806303970253714
acquisizion  0.007896284278885956
pricelin  0.0078084021300453225
exped  0.0077744976056106
skyscanner  0.007392320775861038
cam  0.007350309387120138
sistem  0.007334397505575977
prezz  0.007298197164545249
merc  0.0072533941151960366
vicin  0.007248079396678185


Topic1:
portal  0.007927264244028932
nerd  0.007640426833013053
oltre  0.007522292264320292
arriv  0.007502837806704332
personalizz  0.007477074827756505
binom  0.007333873824639878
prenot  0.007314732116165768
turism  0.0073021225246080144
der  0.007231843186476679
settor  0.00723019764464299
ret  0.007205350797565964
alt  0.007153689125571884


---------- NEWS n° 481
Topic0:
meghan  0.009231293200095232
harry  0.008218383355676822
invit  0.0072342152465570166
futur  0.007083710659447492
padr  0.006818813895770007
potrebb  0.0068182820536402825
fidanz  0.0067939678640533754
markl  0.006716056300887843
kat  0.

---------- NEWS n° 492
Topic0:
sempr  0.007559276742226532
prim  0.006905601585926962
cos  0.006181551733073357
tant  0.005965106231757945
molt  0.005827280682133941
mai  0.005628221822332348
me  0.0055257338479441295
fin  0.004964606305235956
esser  0.004889550438875006
sol  0.004886708716558938
rapport  0.004653376740966753
part  0.004602757177682542


Topic1:
mai  0.0038162063428113444
vorre  0.0037533183991352147
molt  0.0036692152564489935
mentr  0.003644525994445
sol  0.0036270062082418906
stat  0.003598735974595852
esser  0.0035792882300004405
vers  0.00357240396205691
rapport  0.003571020005680005
dop  0.0035525351834012923
apert  0.0035431230163485298
vacanz  0.003542559166058361


---------- NEWS n° 493
Topic0:
cos  0.005613894255209533
nessun  0.0054289652548813
reinvent  0.005395935544466701
stor  0.005394569021741311
oggi  0.005385015434908169
nen  0.005191035168029524
sbagl  0.005180186754322155
riun  0.00511390786329963
vogl  0.005058287789277009
otto  0.0050358266619162

---------- NEWS n° 504
Topic0:
pass  0.007452245423113979
dev  0.007153627313571802
toyot  0.007151895238631687
basil  0.007081016774112371
colp  0.007076102696527853
ordin  0.007020302321564016
satir  0.006982751034913376
loruss  0.006936881891558078
torn  0.006760805179284578
vide  0.006617197938135862
pistol  0.006589853079593149
furgon  0.006486655549380126


Topic1:
scop  0.0074897905112643525
invi  0.007104601767866982
fra  0.0070537359327493224
satir  0.006902495921044984
proib  0.00689182323207206
strad  0.006881622242769027
attravers  0.006788846223213642
carabinier  0.00678304036936911
identific  0.0067426455760282835
notiz  0.006727148763582862
imped  0.006714142839948536
success  0.00667765535961628


---------- NEWS n° 505
Topic0:
strad  0.00420361183243443
bambin  0.00415822816642059
poliz  0.004056008831969002
bancarell  0.004047995283355893
far  0.00400073359541511
noir  0.0039847111150334924
rap  0.003922794840037676
second  0.003907048323509951
fa  0.00390335825917751

---------- NEWS n° 516
Topic0:
brav  0.00496688764026152
spall  0.004888670332610367
alta  0.004868020423231573
pover  0.0048633180917767695
prosper  0.004770129673198079
giorn  0.004721008353254133
otto  0.004717699593533908
sgomber  0.0047169414109040405
nessun  0.004703458489935505
altri  0.004684456548328535
estrem  0.004672856220828669
tocc  0.0046702021217282915


Topic1:
lavor  0.005636830682194809
giorn  0.005598192981719066
campagn  0.005350453330960701
sfratt  0.005160133614154342
puliz  0.005125412255814891
ordin  0.005025226082031709
anni  0.0049597909202765385
ser  0.00495061072933458
fasc  0.004933923258242605
capital  0.004889242823236096
pres  0.0048362875212390275
pubblic  0.004822002830988557


---------- NEWS n° 517
Topic0:
rispond  0.005277553139848641
attiv  0.004867178788698077
suppl  0.004778546007242572
disabil  0.004775929137898757
union  0.004722702312978439
grav  0.004635207599430244
stud  0.004627882533293175
assicur  0.004627543331911261
tratt  0.0046185869

---------- NEWS n° 528
Topic0:
angel  0.03267340233258773
secret  0.029236124337534336
adrian  0.028694373362858374
victor  0.027220166378142726
bast  0.027163755543974717
bellezz  0.027084772258056538
sit  0.026298525555615532
version  0.02574673948688531
ecco  0.025573721543103892
up  0.025534922767136238
mak  0.025177811805479396
veteran  0.02504124282024217


Topic1:
model  0.0284731941976657
scatt  0.027322582555098136
acqua  0.02725664765081315
mak  0.02723562582588473
sapon  0.027089495987930957
anni  0.026413492862303514
up  0.026251912975166488
bellezz  0.02601626495622197
secret  0.02591348654899369
divert  0.025626663045173574
tutt  0.02542501878513788
adrian  0.025211108478503882


---------- NEWS n° 529
Topic0:
capac  0.008907528926444308
ricors  0.008514323383219384
pugl  0.008464153024208268
region  0.008324768541987252
stat  0.008260552816004593
tap  0.007859181358440749
sed  0.00781155436872492
part  0.0077151234702708986
rimett  0.007341473058125044
litig  0.007328324

---------- NEWS n° 540
Topic0:
scott  0.0031403929689999585
sottolin  0.0030729348875651297
stor  0.0029741823148584524
ferm  0.002873454911641859
arriv  0.0028726695184009615
favor  0.0028603213480699165
campegg  0.0028313927826356987
mentr  0.002823615549024303
magar  0.0028202361501812163
riafferm  0.002819850651903543
orgogl  0.002817662586516683
chied  0.0028109729165533438


Topic1:
molt  0.004157365144144125
speranz  0.0040020131788253786
grass  0.0037871508796211155
polit  0.0037300998055136104
assemble  0.0037252940027658983
possibil  0.0036477465303875072
sinistr  0.0036450356285423215
lavor  0.003623919077278068
fil  0.0036127695961013537
qui  0.0036066553988690305
altri  0.003472626530967956
nuov  0.00342404829704487


---------- NEWS n° 541
Topic0:
ariann  0.006044677539297769
ragazz  0.0051325235233576245
torin  0.005104989359989968
tant  0.005053498217433515
ner  0.004832068041096766
anni  0.004773014664682289
music  0.004727288575152136
christian  0.004705610007551685
g

---------- NEWS n° 552
Topic0:
alex  0.0037221518175652455
opac  0.0036565279745737252
ricord  0.0036493269636337254
pens  0.0035907201324088577
amer  0.0035861448323794326
tocc  0.003584924597625292
sorrent  0.0034783045436163813
ottobr  0.003472939393687983
rimpiant  0.003386047586475072
giovann  0.00337322824984777
pass  0.0033612948452475893
sangu  0.0033589132503018637


Topic1:
volt  0.0044312448920808
pass  0.004353139725615463
bastard  0.004250771690673064
dic  0.004191997948004644
charlott  0.0041802740487126445
present  0.004012804071955149
fa  0.0038661266436930872
sogn  0.003794813194123255
napol  0.0036638950483016044
giovann  0.0035995332655583906
cap  0.0035912129676437953
ottobr  0.0035863551695140584


---------- NEWS n° 553
Topic0:
guid  0.007969265237589502
de  0.006157994429344418
mang  0.006089542630948109
cresc  0.0059811712408486956
cont  0.005950402023644103
barill  0.0058633553128664235
quot  0.005287818567582933
stor  0.005182711120559089
megl  0.0051553820985

---------- NEWS n° 564
Topic0:
  0.023693006222964465
inform  0.022878582175634505
figl  0.02207393951413656
dop  0.021967199764443928
marchigian  0.021829117858838412
ragazz  0.02159450025038315
peraltr  0.021572459502439186
dic  0.021530086861581785
frattemp  0.021049809197581003
tracc  0.020759128409871926
anni  0.02072113271080249
bosc  0.020694606003527818


Topic1:
staser  0.023986689932653606
rom  0.02350815780240338
ricostru  0.022450210014669368
nott  0.02199063307366293
notiz  0.02116831435862282
bosc  0.020992024043446075
ragazz  0.020944178901058704
tv  0.020806930360028075
tragic  0.020683527077524706
padr  0.020681030152935022
dicembr  0.020507424472995816
anni  0.02036938053911398


---------- NEWS n° 565
Topic0:
cib  0.015746127860746018
consum  0.015291515278241982
co  0.014797455977142751
op  0.01467185335168694
prefer  0.013990020406488246
prodott  0.013484248096369243
dat  0.012667476634050545
sprec  0.012514874161862245
angli  0.012141831654332138
east  0.011849019

---------- NEWS n° 576
Topic0:
molest  0.012711758281316472
nuov  0.010663646869909333
comport  0.010377510568928269
sanzion  0.009956566963414508
fin  0.009935599626020955
uffic  0.009646455355446035
contratt  0.009486934041708956
licenz  0.008868437329544555
statal  0.008103930624960155
pubblic  0.008022602709842896
denunc  0.008016873712949691
dipendent  0.008014906682631665


Topic1:
sub  0.007498333386506931
anno  0.007375961735440521
mariann  0.007152093890810453
entro  0.007144678805992364
gravit  0.00712410240633094
oner  0.007077174702741339
applic  0.007076368445226921
chiaramont  0.007069458001862557
residt  0.007062212084063081
dipendent  0.007061452212658226
molest  0.007038059493583186
disciplinar  0.007004866464454832


---------- NEWS n° 577
Topic0:
president  0.004987820844051273
salernitan  0.004844084276746947
mister  0.004758418136870493
laz  0.004647326929704104
robert  0.004627288310830997
ered  0.004575705482023058
aver  0.0045713895908192255
donn  0.004564499652

---------- NEWS n° 588
Topic0:
sab  0.009263945844046615
ospedal  0.008916169617894711
sangu  0.008656497582024868
accert  0.00856540054484973
piccol  0.008447894359042519
condizion  0.008392984760615842
iniz  0.008353771849689051
ricover  0.008066354944575797
ancor  0.007902048681539108
caus  0.007728509247323821
miglior  0.007684538258981849
mang  0.007568006781652691


Topic1:
padr  0.008401427676984331
ospedal  0.008104911614123852
uffic  0.008063104025265532
occhi  0.007939829048157227
tracc  0.007912271051229264
quand  0.00784887429389703
mes  0.007745910355466215
origin  0.007743292127323565
condizion  0.0077353387314427905
macchin  0.007658541558305918
cir  0.007620021874265261
consum  0.007618800929463006


---------- NEWS n° 589
Topic0:
negoz  0.01622593688327773
vend  0.015768292477915977
manichin  0.01542948756135057
obam  0.015115075142926255
ex  0.0138100040019431
brooklyn  0.013631948111439633
asi  0.013501024614970008
maschil  0.01328977604787803
dunqu  0.01299794450427

---------- NEWS n° 600
Topic0:
malt  0.016931745419231346
arrest  0.016666276943911672
person  0.01645077463823036
diec  0.01644855177593112
aut  0.015336085559232597
muscat  0.014965420221273083
forz  0.014835164625893845
stat  0.014728894775632988
investig  0.014654729936552707
cas  0.01460214545183729
maltes  0.014587257416447258
omicid  0.014551347114186992


Topic1:
già  0.016208468524704835
poliz  0.01575915045250215
stamp  0.015111117149797038
forz  0.015023710003832213
bugibb  0.01500929309360925
qualc  0.014842151095583157
riteng  0.014678235743587464
ari  0.014569162729418047
malt  0.014564452746774871
var  0.014448152033512704
muscat  0.014423638172325755
esegu  0.01435390555194054


---------- NEWS n° 601
Topic0:
scritt  0.008351826355639943
medagl  0.007993128501290291
resident  0.00783600857664856
avant  0.007695536389250984
ortic  0.007661361733373941
dett  0.007648443422406767
resistt  0.007640753876038476
grupp  0.007599564795599723
lavor  0.0075641269120409185
duc  0.

---------- NEWS n° 612
Topic0:
realizz  0.019252308278049114
fotograf  0.018950180480566438
pezz  0.018797766026738612
piccol  0.018640721802247375
gioc  0.018179794879043202
jones  0.018005661201775068
rohingy  0.017688320018945887
maggior  0.017688104745254188
baz  0.017459141591998927
migliai  0.017458778890791733
profug  0.017200411957719323
ogni  0.016863261682377412


Topic1:
arriv  0.019277735104883134
ricord  0.019068302086537686
giorn  0.018419843571119778
donn  0.01784666033762026
settiman  0.017584947729707694
continu  0.017331940780771778
camp  0.017273514682959002
trascors  0.01722105233635597
scors  0.01704835120910428
franc  0.017039028559343035
recent  0.01702865596479805
batter  0.016883684734883515


---------- NEWS n° 613
Topic0:
fior  0.012480504458907636
fors  0.011657607731707162
principal  0.011460911870588885
parlamentar  0.01133874668963219
mil  0.011336624400189376
previst  0.011320847705373904
cittadin  0.011168203224935013
classic  0.011161502950341321
cos  

---------- NEWS n° 624
Topic0:
inter  0.007936875976676187
cos  0.007503413915766235
poi  0.007133417555190214
gioc  0.0070499420944142585
arriv  0.006128030656663854
lavor  0.005381092819377982
ora  0.005343548588599289
fa  0.005314042961836984
giocator  0.005298973627162602
qualcos  0.005216977356489089
aiut  0.0052038599448533274
fatt  0.005196869754011072


Topic1:
part  0.00495451539748186
livell  0.004776831651532342
gioc  0.004623364296392958
mister  0.0045233774626733175
inter  0.004485250798299632
gol  0.004435087708503268
davant  0.004434207868610482
quand  0.0044235501793298765
champions  0.004394980384583164
anno  0.004346645193480706
venner  0.004336712332820285
permett  0.004323966061612978


---------- NEWS n° 625
Topic0:
stat  0.0188979615501426
incident  0.015696091785357035
ex  0.013725123364602508
prim  0.013486296799902986
trasport  0.01330154553791936
ospedal  0.01325342567844009
nessun  0.013194739902856217
ricostru  0.013185834034039843
pubblic  0.013153478827051

---------- NEWS n° 636
Topic0:
prevalent  0.007032497115655588
quind  0.006649335052240959
insiem  0.006645296177326789
coinvolt  0.006587280646090295
poc  0.006553814186411665
cranic  0.006345568529722713
compagn  0.0063154591221367285
cassin  0.006218707111545249
sinistr  0.006209962957075637
lateral  0.006112643285828203
boschett  0.00607034779488865
mogl  0.006051006199918493


Topic1:
seren  0.009246721883757525
indag  0.008236427521528636
ipotes  0.008096486365934496
mollicon  0.007412997556852474
stat  0.00722103211386034
arce  0.0069569963069750365
cap  0.006943931291995574
port  0.00686232201954381
lung  0.006840460889490269
cassin  0.006733060658297232
omicid  0.0066455294526545665
procur  0.006504070684697145


---------- NEWS n° 637
Topic0:
felic  0.034327906854836915
parol  0.03165518455403979
hygge  0.02986097276679981
coccol  0.028253748058678595
trend  0.027863432196763075
ben  0.027570767716591518
  0.027397255450503213
signif  0.02722027084352478
cos  0.02669807817350

---------- NEWS n° 648
Topic0:
stat  0.008576357577226605
ordign  0.006980323202005594
episod  0.006827988956441253
cas  0.006231541392184182
carabinier  0.006201883916460839
caserm  0.0056617034518594575
anarc  0.0054891851913324834
esplosion  0.005316648526056777
esser  0.005114437220467851
molt  0.00498347731893719
indagin  0.004918235127736401
port  0.004869850313823181


Topic1:
vicin  0.004565389040571132
front  0.004561507215664328
dar  0.0045303325704141965
marciapied  0.004353148494539162
molt  0.004333909043535392
dannegg  0.00429419577564906
cap  0.0042659394383651825
dot  0.004256088299328521
centinai  0.004174070693772887
abbast  0.004154432804107655
ambient  0.004135583179434125
carabinier  0.004091553267904575


---------- NEWS n° 649
Topic0:
dam  0.009209517289062323
maneskin  0.006628776702533877
dop  0.006461517438197889
cos  0.0060216139676203696
pariett  0.0060053451768268115
alba  0.00596964848883586
amo  0.005926513529346238
caratt  0.0059100618742464256
capezzol 

---------- NEWS n° 660
Topic0:
final  0.0164231197537687
edizion  0.014980873113033432
uomin  0.014220485027168788
dicembr  0.013380318268032203
maneskin  0.013052254479907049
ros  0.012815659170223643
factor  0.012653527838241186
talent  0.012640924846899274
lorenz  0.012607577012438722
viv  0.012559926904647438
numer  0.012514255631691023
samuel  0.0124937223165489


Topic1:
desert  0.013726088533102249
vittor  0.013411017530207847
viv  0.01326799303275555
dam  0.01297382256525704
nuov  0.012923021702276865
prim  0.012921980287207363
concorrent  0.012767899665284002
mar  0.012601550700207164
sheeran  0.012593817297212139
quart  0.012530996937021636
fidanz  0.012508259156518658
sex  0.012380106960541475


---------- NEWS n° 661
Topic0:
sabb  0.006150116021567431
bagnin  0.0060600451990234505
ben  0.00596942099509502
fa  0.005831344221432107
cos  0.005391552365674038
dir  0.00536757395534582
mor  0.005134403174410776
spiagg  0.005059392467485154
bagn  0.004972101581304554
sempr  0.0049

---------- NEWS n° 672
Topic0:
elin  0.01263205797398607
atlet  0.01232217302294022
mes  0.009019511610941888
pos  0.008779643862508207
ucrain  0.008681606767001044
torne  0.008639279139964007
camp  0.008461444058390215
carrier  0.008290432971309939
internazional  0.008269996498135848
success  0.008188096224113758
mar  0.008141397126740136
stat  0.008124780156279212


Topic1:
piac  0.008116206241672835
giust  0.008064224722296927
tennist  0.00804309570579517
import  0.007956844245725765
seduc  0.007728923877969794
osserv  0.007646825706651663
ultim  0.0076380351526407556
dop  0.007624147399069022
rivist  0.007546640669349785
card  0.007486022003962277
mond  0.007462101283546484
cort  0.00745268161411099


---------- NEWS n° 673
Topic0:
sinistr  0.012600439201129774
inter  0.0117567222916921
problem  0.010133054997621762
allen  0.009898086438387655
recuper  0.009886809587692169
napol  0.009766544874778871
scudett  0.00965671324594654
affront  0.00958725733388462
sfid  0.0094587005911333

---------- NEWS n° 684
Topic0:
ognun  0.009288217495960474
vest  0.009187070705714437
strad  0.009063946552638385
fil  0.00903511587501651
amat  0.009013016682726244
meravigl  0.00900141265347077
parl  0.00888375417969442
so  0.008781201399957387
squadr  0.0087453225876121
uso  0.008702066024894718
vit  0.008619421120204102
anni  0.00850645810566069


Topic1:
fa  0.00938567129893562
vit  0.008992438061233655
avvicin  0.008734211256435713
qual  0.008710330433235621
innov  0.008702926201088394
italian  0.008611919935258033
paes  0.008606131925193324
squadr  0.008551099628048783
vogl  0.008467702263997973
person  0.008391885135796586
figl  0.008346977151882179
pettin  0.008342396578714948


---------- NEWS n° 685
Topic0:
libr  0.0114039948358767
scrittor  0.009579088087683218
classif  0.009137960812269636
lettur  0.009009809794106906
traduzion  0.008855175241934738
traduttor  0.008614947034443413
second  0.008560512785728079
oper  0.008535521702085278
vot  0.008390812804281349
anno  0.008

---------- NEWS n° 696
Topic0:
illegal  0.014212760335664628
impost  0.013873287587645567
apert  0.013714509232747642
tutt  0.01362770115705509
sistem  0.013590511244721054
protett  0.01303478919385186
ricev  0.012930668174633678
pot  0.012646690246716866
iniz  0.012625396125943818
virtual  0.012569329030490392
fiduc  0.012534395407097857
mod  0.012394030035614617


Topic1:
bitcoin  0.018471912720898126
monet  0.015378533889429776
controll  0.014437478810334203
senz  0.01438666788183874
transazion  0.014338326881228124
scamb  0.013713762915696103
partecip  0.013647623544729816
possibil  0.013326663458282297
riconosc  0.01332625699418187
mod  0.013097101603680154
fatt  0.013086421965498666
sistem  0.012767675137892209


---------- NEWS n° 697
Topic0:
quotidian  0.009030743679510836
bett  0.00882837030443223
anni  0.008666055467808229
passegger  0.008608965795189048
hostess  0.008555932330241474
vol  0.008394539339949391
signor  0.008260207571115967
rispond  0.008122566954715221
ved  0.0

---------- NEWS n° 708
Topic0:
diamant  0.004529854046713828
cener  0.004526677464471663
anni  0.004157580013041308
poi  0.004071635826213054
car  0.003958229209167924
mort  0.003907385664287285
dop  0.0038773685204183195
sembr  0.003861475237128394
client  0.003795420020280146
cos  0.003782510546682487
gramm  0.0037613089311332236
gioiell  0.003739451475947034


Topic1:
fisic  0.004032539302681333
element  0.0038505382125222087
graf  0.003823208412944531
abbandon  0.003811563782114706
terrestr  0.0037575051935764894
ital  0.00374243793373544
ogni  0.003661948506011742
darl  0.003660726420384508
butt  0.0036540665618172725
confront  0.0036370665765246457
person  0.0036268708362080498
malincon  0.0036266997085592197


---------- NEWS n° 709
Topic0:
leopard  0.011090923088425348
grand  0.010442718563195258
pessim  0.008004131205204082
portabors  0.007727780033626306
manzon  0.007695668991428415
professor  0.007676449236694295
altro  0.007675727035117072
sol  0.007570777155085623
appunt  

---------- NEWS n° 720
Topic0:
centrist  0.026947813152679134
miglior  0.024758662782721878
democraz  0.02450265622337232
dilemm  0.023629406492237017
attanagl  0.023359218769603042
europ  0.023060359848671785
udc  0.02300878402099317
popolar  0.02270835277729399
vann  0.02212470509612475
civic  0.02206896778445788
autonom  0.02201210802927717
vist  0.021845283028364748


Topic1:
solidal  0.02709959637578164
strateg  0.026186123798483715
ferdin  0.02507508276817789
autonom  0.02409463272349111
popol  0.023651073008048532
direzion  0.023583026199528644
grand  0.02349369313182672
different  0.023401211052076373
gall  0.023380575634678692
moltepl  0.02288842103927492
sfumatur  0.0224319191056322
popolar  0.022012142672222325


---------- NEWS n° 721
Topic0:
impegnin  0.00433536287305011
palestines  0.00433064314612775
bruc  0.004316979955343622
rispond  0.004285910150341677
ebre  0.004240434842108067
poi  0.004205388215742962
onu  0.0041866917628177605
port  0.004135794795869937
premier  

---------- NEWS n° 732
Topic0:
effett  0.013335275791356913
habb  0.01272776792822266
italian  0.01259382420843061
onor  0.012585432913248668
sent  0.012410791906955326
gall  0.012327055492282137
cadrebber  0.01222295136054491
infin  0.012158146543863789
stor  0.012118715057227092
sbarc  0.011987599124274311
cant  0.01187162320187005
bomb  0.011845468623053733


Topic1:
mut  0.012982620391010008
divers  0.012958235260951037
allor  0.012759774307482251
sbarc  0.012564191521016073
bicchier  0.012334557249823372
pronunc  0.012284725005158006
scuol  0.012076950430888812
effett  0.012013546839129493
rodar  0.011927041743804684
vist  0.011915050891153767
cum  0.011902107224452157
restitu  0.011883048195707449


---------- NEWS n° 733
Topic0:
nient  0.006333764286064656
autorizz  0.006323660826789689
tutt  0.006296324483859016
organizz  0.005910812962329511
indagin  0.005883762060388242
mifsud  0.005855813091301296
person  0.0058346675896436065
interess  0.005812878940716966
dic  0.0057606025

In [46]:
# Clean again the words... 740 articoli per 2194 sec

more_stopwords = ['me', 'poi', 'fin', 'dop', 'quand', 'aver', 'senz', 'cos', 'due', 'molt', 'nuov', 'fa', 'esser',
                  'sol', 'mai', 'può', 'prim', 'tant', 'second', 'grand', 'fin', 'via', 'piccol', 'ora', 'sempr', 
                  'tre', 'dic', 'ancor', 'propr', 'mes', 'anno', 'anni', 'scors', 'far', 'arriv', 'sembr', 'circ'
                 'port', 'temp', 'post', 'oltre', 'stess', 'tutt', 'volt',  'po', 
                  'mentr', 'altri', 'invec', 'fatt', 'giorn', 'cas', 'oggi',] 

# Time, size, auxiliar verbs... Extra words 'fatt', 'giorn', 'cas', 'oggi']
    
    

for i in range(0, len(lista_topic)):
    
    word_tokens = word_tokenize(lista_topic[i])
    filtered_text = [w for w in word_tokens if not w in more_stopwords]
    lista_topic[i] = ' '.join(word for word in filtered_text)
    
print(lista_topic)

['pot fam nient vol dov mort altri omicid capac colpevol mal', 'riin bagarell maf dir ignor cap capitol lett ginocc pens', 'imam nacional stat barcellon es rambl cni abouyaakoub terror agent stralc', 'stralc quattr nitroglicerin quind contatt spagnol comunqu ambient elimin organizz enne', 'graz altra unveiling musk californ jolly fors camion colp batter', 'liquid obblig californ spost distrarr tecnolog nessun cart prestazion inventor', 'ner louis linton pell tesor domin fiss lung web mnuchin', 'maggior gang tesor compars vest iron diciottenn cert guant risult ultim comment', 'direttor espert omonim stagion creat lung estrazion per ball color persin strizz', 'cappott divent stamp color conviv stil fior abit oggett cap', 'napol laz assest potrebb part rest sembr vicin chiar', 'inter vicin napol sampdor sorpres med belott favor sembr', 'ier campagn ospit import arterioscler uscit elisabett teres spieg autor', 'raccont stor lettur numer sal novembr ciabatt autor omagg libr parol', 'vist ca

In [49]:
vectorizer = TfidfVectorizer() # use_idf=True, ngram_range=(1, 3)

number_topic =  20
number_words = 15

#------ STARTING LSI PROCEDURE ----------
start_LSI_time = time.time()

X = vectorizer.fit_transform(lista_topic) # with range we can capture latent structure 

lsa = TruncatedSVD(n_components = number_topic, n_iter = 1000) # Gauge the components
lsa.fit(X)

terms = vectorizer.get_feature_names()
out = []
for i, comp in enumerate(lsa.components_):
    termsInComp = zip(terms,comp)
    sortedTerms = sorted(termsInComp, key=lambda x: x[1], reverse=True ) [:number_words]
    
    for term in sortedTerms:
        out.append(term[0])
        
print("--- The LSI procedure took  %s seconds  ---" % (time.time() - start_LSI_time))

# We could sum and/or compare times at this point...

--- The LSI procedure took  10.363251209259033 seconds  ---


In [50]:

index_topic = 1
index_words = 1

print('Topic %s' %index_topic )

for w in out:

    print(str(index_words) + ') ' + w)
    occur = []
    for i in range(0, len(interpreter[w])):
        occur = occur + [[interpreter[w][i]] + [counter[interpreter[w][i]]]] 
    
    occur.sort(key=lambda x: x[1], reverse = True)
    print(occur)
    print('   ')
    
    if index_words == number_words:
        index_topic += 1
        print('Topic %s' %index_topic)
        index_words = 1
    else:
        index_words +=1
    

Topic 1
1) stat
[['stato', 752], ['stata', 419], ['stati', 257], ['state', 98], ['statuto', 3]]
   
2) part
[['parte', 366], ['partito', 95], ['partire', 57], ['partita', 53], ['parti', 33], ['partiti', 24], ['partite', 22], ['partendo', 8], ['parto', 6], ['partono', 4], ['partirà', 3], ['partiva', 2], ['partirono', 2], ['partivano', 2], ['partirebbe', 1], ['parta', 1], ['partano', 1]]
   
3) ital
[['italia', 376], ['italiano', 104], ['italo', 4], ['italici', 2], ['italico', 2], ['italiche', 1]]
   
4) lavor
[['lavoro', 301], ['lavoratori', 59], ['lavorare', 48], ['lavori', 43], ['lavorato', 36], ['lavora', 34], ['lavorando', 20], ['lavorano', 15], ['lavorava', 10], ['lavoravano', 4], ['lavoratrici', 4], ['lavorativo', 3], ['lavoratore', 3], ['lavoratrice', 3], ['lavorati', 3], ['lavoravo', 2], ['lavorativa', 2], ['lavorazione', 2], ['lavorative', 1], ['lavoriamo', 1], ['lavorerà', 1], ['lavorò', 1]]
   
5) mil
[['milano', 253], ['mila', 210], ['milita', 4]]
   
6) polit
[['politica', 

[['serie', 114], ['sera', 80], ['serata', 30], ['serio', 20], ['seria', 12], ['seriamente', 7], ['seri', 4], ['serate', 3], ['sere', 1]]
   
10) uom
[['uomo', 184]]
   
11) accus
[['accusa', 72], ['accuse', 41], ['accusato', 38], ['accusata', 11], ['accusati', 8], ['accusate', 4], ['accusò', 3], ['accusano', 3], ['accusarono', 2], ['accusandola', 2]]
   
12) mil
[['milano', 253], ['mila', 210], ['milita', 4]]
   
13) general
[['generale', 110], ['generali', 10], ['general', 5], ['generalista', 3], ['generalmente', 2], ['generaliste', 1]]
   
14) lavor
[['lavoro', 301], ['lavoratori', 59], ['lavorare', 48], ['lavori', 43], ['lavorato', 36], ['lavora', 34], ['lavorando', 20], ['lavorano', 15], ['lavorava', 10], ['lavoravano', 4], ['lavoratrici', 4], ['lavorativo', 3], ['lavoratore', 3], ['lavoratrice', 3], ['lavorati', 3], ['lavoravo', 2], ['lavorativa', 2], ['lavorazione', 2], ['lavorative', 1], ['lavoriamo', 1], ['lavorerà', 1], ['lavorò', 1]]
   
15) gallitell
[['gallitelli', 16]]
   

In [107]:
#  Saving
with open(path + actual_journal[1] + ' Output', "w") as output:
    output.write("\n".join(map(lambda x: str(x), out)))