# Most frequent words

In [113]:
#Import libraries
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
import string
import nltk
import spacy

In [None]:
#!python -m spacy download it_core_news_sm #ITALIAN POS TAGGING

In [114]:
#Import the dataset
df = pd.read_csv("transcriptions.csv") #transcriptions
df1=pd.read_csv("all_commercials_classified_filtered.csv") # Nice_categories

In [115]:
#Join the two datasets
df_total=pd.concat([df.set_index('commercial_id'),df1.set_index('commercial_id')], axis=1, join='inner') #Join on commercial_id
#Keep only the transcription with log_prob >=-0.5
df_total=df_total.loc[df_total['log_prob']>=-0.5]

#Remove all the Nan values of nice_class, they are marked by -1
df_total=df_total.loc[df_total['nice_class']>-1]

#Select only the columns of interest: transcriptions (of the descriptions) and nice_class
df_total.drop('title', axis=1, inplace=True)
df_total.drop('log_prob', axis=1, inplace=True)

In [116]:
#Remove the punctuation from the transcriptions
def remove_punctuation(text):
    if pd.isna(text):
        return text
    punctuationfree="".join([i for i in str(text) if i not in string.punctuation])
    return punctuationfree

df_total['transcription'] = df_total['transcription'].apply(remove_punctuation)

In [118]:
nlp = spacy.load("it_core_news_sm")
def lemmatize(testo):
    doc = nlp(testo)
    lemmi = [token.lemma_ for token in doc]
    return " ".join(lemmi)
df_total['transcription']=df_total['transcription'].apply(lemmatize)

In [119]:
#Apply the lower method to strings

df_total['transcription'] = df_total['transcription'].apply(lambda s: s.lower() if type(s) == str else s)

#Remove the stopwords
stopwords = stopwords.words('italian')
stopwords_plus=['cè', 'così', 'oh', 'eh', 'sì', 'to', 'po', 'già', 'mai', 'no', 'ce', 'ah', 'allora', 'me', 'doro']
for element in stopwords_plus:
    stopwords.append(element)
df_total['transcription'] = df_total['transcription'].apply(lambda x: ' '.join([word for word in str(x).split() if word not in (stopwords)]))


In [120]:
#Grouping all by nice_class and lustrum
dictionary = {}
for i, g in df_total.groupby(['nice_class', 'lustrum']):
    if i not in dictionary:
        dictionary[i] = g['transcription'].tolist()  
    else:
        dictionary[i].extend(g['transcription'].tolist())

In [121]:
#Transform the value lists into strings

for key, value in dictionary.items():
    dictionary[key] = ' '.join(value)
    
dictionary[28.0, '1980_1984'] #example

'ehi moto chicco acceleratore raglio specchietto classo cambio perfetto lg corda ora senza bisogno motore brillantina essere pallina parente allina brillantina comodino pallina nome essere brillantina brillantina rimbalzina coloratissimo brillantissimo essere pallina divertitevi brillantina rimbalzina pallina super rimbalzare ogni altro pallina brillantina rimbalzina adesiva personalizzante essere unesclusivo linea g occhio grande grande capello biondi biondi essere candy candy candy candy candy candy candy candy essere fare quattro essere candy candy saltaragno nuovo gioco emozionante attenzione ragno essere cattivo potere salvare animaletto catturare ragno toccare ragnatela voglio provare anchio paurare adesso provo attenzione aiuto emozionante divertente saltaragno nuovo gioco editore giochi bravo bravo tanto bravo prere formelli brillo novello inuntro listantire cogli luntro piatti ora brillare sennuore bravo essere qua essere grande qualità bento core coltello cagliote gamia lasze

In [141]:
#Tokenization and PoS tagging

In [123]:
nlp = spacy.load("it_core_news_sm")

def pos_tag_italian(text):
    # Tokenize and POS tag the text using SpaCy
    doc = nlp(text)
    # Create a list of tuples with tokens and their corresponding POS tags
    pos_tags = [(token.text, token.pos_) for token in doc]
    return pos_tags

In [124]:
spots_words = dict()
for key in dictionary:
    spots_words[key] = pos_tag_italian(dictionary[key])
    
spots_words[28.0, '1980_1984'] #example

[('ehi', 'PROPN'),
 ('moto', 'PROPN'),
 ('chicco', 'ADJ'),
 ('acceleratore', 'NOUN'),
 ('raglio', 'ADJ'),
 ('specchietto', 'VERB'),
 ('classo', 'ADJ'),
 ('cambio', 'NOUN'),
 ('perfetto', 'ADJ'),
 ('lg', 'PROPN'),
 ('corda', 'VERB'),
 ('ora', 'ADV'),
 ('senza', 'ADP'),
 ('bisogno', 'NOUN'),
 ('motore', 'NOUN'),
 ('brillantina', 'ADJ'),
 ('essere', 'AUX'),
 ('pallina', 'VERB'),
 ('parente', 'PROPN'),
 ('allina', 'ADJ'),
 ('brillantina', 'PROPN'),
 ('comodino', 'PROPN'),
 ('pallina', 'PROPN'),
 ('nome', 'NOUN'),
 ('essere', 'AUX'),
 ('brillantina', 'PROPN'),
 ('brillantina', 'PROPN'),
 ('rimbalzina', 'NOUN'),
 ('coloratissimo', 'ADJ'),
 ('brillantissimo', 'VERB'),
 ('essere', 'AUX'),
 ('pallina', 'VERB'),
 ('divertitevi', 'NOUN'),
 ('brillantina', 'ADJ'),
 ('rimbalzina', 'NOUN'),
 ('pallina', 'VERB'),
 ('super', 'ADJ'),
 ('rimbalzare', 'VERB'),
 ('ogni', 'DET'),
 ('altro', 'ADJ'),
 ('pallina', 'NOUN'),
 ('brillantina', 'ADJ'),
 ('rimbalzina', 'NOUN'),
 ('adesiva', 'ADJ'),
 ('personalizzan

In [125]:
#Print the top 10 words of every nice class and lustrum

vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_words[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_words[key])

top = 10
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:10]
    for word, frequency in words_top:
        print(key, word, frequency, sep='\t')

(1.0, '1980_1984')	('essere', 'AUX')	3
(1.0, '1980_1984')	('chimeral', 'PROPN')	2
(1.0, '1980_1984')	('oggi', 'ADV')	1
(1.0, '1980_1984')	('sabato', 'NOUN')	1
(1.0, '1980_1984')	('finalmente', 'ADV')	1
(1.0, '1980_1984')	('giorno', 'NOUN')	1
(1.0, '1980_1984')	('substral', 'ADJ')	1
(1.0, '1980_1984')	('wow', 'PROPN')	1
(1.0, '1980_1984')	('substral', 'PROPN')	1
(1.0, '1980_1984')	('cuore', 'ADJ')	1

(1.0, '1985_1989')	('essere', 'AUX')	2
(1.0, '1985_1989')	('fare', 'VERB')	2
(1.0, '1985_1989')	('bostik', 'NOUN')	2
(1.0, '1985_1989')	('primavera', 'NOUN')	2
(1.0, '1985_1989')	('vien', 'AUX')	2
(1.0, '1985_1989')	('danzare', 'VERB')	2
(1.0, '1985_1989')	('buongiorno', 'ADJ')	1
(1.0, '1985_1989')	('chiodo', 'VERB')	1
(1.0, '1985_1989')	('piegare', 'VERB')	1
(1.0, '1985_1989')	('spezzare', 'VERB')	1

(2.0, '1980_1984')	('essere', 'AUX')	2
(2.0, '1980_1984')	('25000', 'NUM')	2
(2.0, '1980_1984')	('tanto', 'ADV')	2
(2.0, '1980_1984')	('casacolor', 'NOUN')	2
(2.0, '1980_1984')	('stanza', 'NOU

In [126]:
#Print the top 10 ADJECTIVES of every nice class
spots_words = dict()
spots_adj=dict()
for key in dictionary:
     spots_words[key] = pos_tag_italian(dictionary[key])
for key in spots_words:
    if key not in spots_adj:
        spots_adj[key] = []
    for values in spots_words[key]:
        if 'ADJ' in values:  
            spots_adj[key].append(values)
    
spots_adj[28.0, '1980_1984'] #example

[('chicco', 'ADJ'),
 ('raglio', 'ADJ'),
 ('classo', 'ADJ'),
 ('perfetto', 'ADJ'),
 ('brillantina', 'ADJ'),
 ('allina', 'ADJ'),
 ('coloratissimo', 'ADJ'),
 ('brillantina', 'ADJ'),
 ('super', 'ADJ'),
 ('altro', 'ADJ'),
 ('brillantina', 'ADJ'),
 ('adesiva', 'ADJ'),
 ('unesclusivo', 'ADJ'),
 ('grande', 'ADJ'),
 ('grande', 'ADJ'),
 ('nuovo', 'ADJ'),
 ('emozionante', 'ADJ'),
 ('ragno', 'ADJ'),
 ('cattivo', 'ADJ'),
 ('ragno', 'ADJ'),
 ('provo', 'ADJ'),
 ('divertente', 'ADJ'),
 ('nuovo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('brillo', 'ADJ'),
 ('inuntro', 'ADJ'),
 ('piatti', 'ADJ'),
 ('bravo', 'ADJ'),
 ('grande', 'ADJ'),
 ('core', 'ADJ'),
 ('lecetto', 'ADJ'),
 ('cromato', 'ADJ'),
 ('metallo', 'ADJ'),
 ('bello', 'ADJ'),
 ('bravo', 'ADJ'),
 ('grande', 'ADJ'),
 ('bravo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('bravo', 'ADJ'),
 ('super', 'ADJ'),
 ('videogioco', 'ADJ'),
 ('facile', 'ADJ'),
 ('difficile', 'ADJ'),
 ('fantastico', 'ADJ'),
 ('atari', 'ADJ'),
 ('att

In [127]:
vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_adj[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_adj[key])

top = 10
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:10]
    for word, frequency in words_top:
        print(key, word, frequency, sep='\t')

(1.0, '1980_1984')	('substral', 'ADJ')	1
(1.0, '1980_1984')	('cuore', 'ADJ')	1
(1.0, '1980_1984')	('lunico', 'ADJ')	1
(1.0, '1980_1984')	('chimeral', 'ADJ')	1
(1.0, '1980_1984')	('fiore', 'ADJ')	1
(1.0, '1980_1984')	('splendida', 'ADJ')	1
(1.0, '1980_1984')	('vanitoso', 'ADJ')	1
(1.0, '1980_1984')	('bello', 'ADJ')	1
(1.0, '1980_1984')	('allegria', 'ADJ')	1

(1.0, '1985_1989')	('buongiorno', 'ADJ')	1
(1.0, '1985_1989')	('provo', 'ADJ')	1
(1.0, '1985_1989')	('difficile', 'ADJ')	1
(1.0, '1985_1989')	('adesivo', 'ADJ')	1
(1.0, '1985_1989')	('segreto', 'ADJ')	1
(1.0, '1985_1989')	('gesal', 'ADJ')	1

(2.0, '1980_1984')	('pratico', 'ADJ')	1
(2.0, '1980_1984')	('idropitturo', 'ADJ')	1
(2.0, '1980_1984')	('grande', 'ADJ')	1
(2.0, '1980_1984')	('spazioso', 'ADJ')	1
(2.0, '1980_1984')	('magnifico', 'ADJ')	1
(2.0, '1980_1984')	('lavabile', 'ADJ')	1
(2.0, '1980_1984')	('nuovo', 'ADJ')	1
(2.0, '1980_1984')	('soffitto', 'ADJ')	1
(2.0, '1980_1984')	('compreso', 'ADJ')	1
(2.0, '1980_1984')	('progredito

In [128]:
#Print the top 10 NOUNS of every nice class
spots_words = dict()
spots_noun=dict()
for key in dictionary:
     spots_words[key] = pos_tag_italian(dictionary[key])
for key in spots_words:
    if key not in spots_noun:
        spots_noun[key] = []
    for values in spots_words[key]:
        if 'NOUN' in values:  
            spots_noun[key].append(values)
    
spots_noun[28.0, '1980_1984'] #example

[('acceleratore', 'NOUN'),
 ('cambio', 'NOUN'),
 ('bisogno', 'NOUN'),
 ('motore', 'NOUN'),
 ('nome', 'NOUN'),
 ('rimbalzina', 'NOUN'),
 ('divertitevi', 'NOUN'),
 ('rimbalzina', 'NOUN'),
 ('pallina', 'NOUN'),
 ('rimbalzina', 'NOUN'),
 ('linea', 'NOUN'),
 ('occhio', 'NOUN'),
 ('capello', 'NOUN'),
 ('biondi', 'NOUN'),
 ('saltaragno', 'NOUN'),
 ('gioco', 'NOUN'),
 ('attenzione', 'NOUN'),
 ('potere', 'NOUN'),
 ('ragnatela', 'NOUN'),
 ('anchio', 'NOUN'),
 ('attenzione', 'NOUN'),
 ('aiuto', 'NOUN'),
 ('saltaragno', 'NOUN'),
 ('gioco', 'NOUN'),
 ('editore', 'NOUN'),
 ('giochi', 'NOUN'),
 ('formelli', 'NOUN'),
 ('luntro', 'NOUN'),
 ('sennuore', 'NOUN'),
 ('qualità', 'NOUN'),
 ('bento', 'NOUN'),
 ('cagliote', 'NOUN'),
 ('gamia', 'NOUN'),
 ('laszello', 'NOUN'),
 ('sennuore', 'NOUN'),
 ('qualità', 'NOUN'),
 ('limone', 'NOUN'),
 ('paglietta', 'NOUN'),
 ('limone', 'NOUN'),
 ('cassetta', 'NOUN'),
 ('gioco', 'NOUN'),
 ('tv', 'NOUN'),
 ('punto', 'NOUN'),
 ('atari', 'NOUN'),
 ('wow', 'NOUN'),
 ('cancell

In [129]:
vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_noun[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_noun[key])

top = 10
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:10]
    for word, frequency in words_top:
              print(key, word, frequency, sep='\t')

(1.0, '1980_1984')	('sabato', 'NOUN')	1
(1.0, '1980_1984')	('giorno', 'NOUN')	1
(1.0, '1980_1984')	('foglia', 'NOUN')	1
(1.0, '1980_1984')	('substral', 'NOUN')	1
(1.0, '1980_1984')	('piante', 'NOUN')	1

(1.0, '1985_1989')	('bostik', 'NOUN')	2
(1.0, '1985_1989')	('primavera', 'NOUN')	2
(1.0, '1985_1989')	('cocrepe', 'NOUN')	1
(1.0, '1985_1989')	('disastro', 'NOUN')	1
(1.0, '1985_1989')	('servo', 'NOUN')	1
(1.0, '1985_1989')	('assembler', 'NOUN')	1
(1.0, '1985_1989')	('bisogno', 'NOUN')	1
(1.0, '1985_1989')	('cura', 'NOUN')	1
(1.0, '1985_1989')	('liquido', 'NOUN')	1
(1.0, '1985_1989')	('pianta', 'NOUN')	1

(2.0, '1980_1984')	('casacolor', 'NOUN')	2
(2.0, '1980_1984')	('stanza', 'NOUN')	2
(2.0, '1980_1984')	('colore', 'NOUN')	2
(2.0, '1980_1984')	('prova', 'NOUN')	1
(2.0, '1980_1984')	('lira', 'NOUN')	1
(2.0, '1980_1984')	('potere', 'NOUN')	1
(2.0, '1980_1984')	('superficie', 'NOUN')	1
(2.0, '1980_1984')	('tecnologia', 'NOUN')	1
(2.0, '1980_1984')	('meyer', 'NOUN')	1

(3.0, '1980_1984')	(

## Most frequent words in LUSTRA

In [131]:
#Grouping all by lustrum
dictionary =df_total.groupby('lustrum')['transcription'].apply(list).to_dict()

In [132]:
dictionary['1980_1984']

['essere persona potere permettere stare casa quando fare brutto tempo primo sintomo raffreddore influenza presto aspirinare aspirina andare bene presto seguire attentamente avvertenza modalità duso',
 'qui volere black decker potere forare materiale accessorio trasformare sega circolare perfetto levigatrice dirai lho fatto black decker accessorio trapano black decker partire 33200 lira natale gamma black decker trovare sempre regalo utile regalo durare sempre black decker',
 'carciofo ricco qualità naturale cynar aperitivo base carciofo dopo pranzo cynar liscio bere liscio cynar essere ottimo amaro scelta naturale',
 'volere raffreddore uninfluenza guardare avere qualcosa te cebiopirina effervescente vitamina raffreddore influenza male testa primo sintomo cebiopirina effervescente vitamina cebiopirina effervescente cebiopirina effervescente essere prodotto bracco seguire attentamente avvertenza modalità duso',
 'linea graziella simpatia due ruota buongiorno fabio ciao fabio linea graz

In [133]:
for key, value in dictionary.items():
    dictionary[key] = ' '.join(value)

In [134]:
dictionary['1980_1984']

'essere persona potere permettere stare casa quando fare brutto tempo primo sintomo raffreddore influenza presto aspirinare aspirina andare bene presto seguire attentamente avvertenza modalità duso qui volere black decker potere forare materiale accessorio trasformare sega circolare perfetto levigatrice dirai lho fatto black decker accessorio trapano black decker partire 33200 lira natale gamma black decker trovare sempre regalo utile regalo durare sempre black decker carciofo ricco qualità naturale cynar aperitivo base carciofo dopo pranzo cynar liscio bere liscio cynar essere ottimo amaro scelta naturale volere raffreddore uninfluenza guardare avere qualcosa te cebiopirina effervescente vitamina raffreddore influenza male testa primo sintomo cebiopirina effervescente vitamina cebiopirina effervescente cebiopirina effervescente essere prodotto bracco seguire attentamente avvertenza modalità duso linea graziella simpatia due ruota buongiorno fabio ciao fabio linea graziella simpatia du

In [135]:
spots_words = dict()
for key in dictionary:
    spots_words[key] = pos_tag_italian(dictionary[key])
spots_words['1980_1984']

[('essere', 'AUX'),
 ('persona', 'VERB'),
 ('potere', 'NOUN'),
 ('permettere', 'VERB'),
 ('stare', 'VERB'),
 ('casa', 'NOUN'),
 ('quando', 'SCONJ'),
 ('fare', 'VERB'),
 ('brutto', 'ADJ'),
 ('tempo', 'NOUN'),
 ('primo', 'ADJ'),
 ('sintomo', 'NOUN'),
 ('raffreddore', 'ADJ'),
 ('influenza', 'NOUN'),
 ('presto', 'ADV'),
 ('aspirinare', 'VERB'),
 ('aspirina', 'VERB'),
 ('andare', 'VERB'),
 ('bene', 'ADV'),
 ('presto', 'ADV'),
 ('seguire', 'VERB'),
 ('attentamente', 'ADV'),
 ('avvertenza', 'ADJ'),
 ('modalità', 'NOUN'),
 ('duso', 'ADJ'),
 ('qui', 'ADV'),
 ('volere', 'VERB'),
 ('black', 'ADJ'),
 ('decker', 'NOUN'),
 ('potere', 'NOUN'),
 ('forare', 'VERB'),
 ('materiale', 'NOUN'),
 ('accessorio', 'ADJ'),
 ('trasformare', 'VERB'),
 ('sega', 'NOUN'),
 ('circolare', 'ADJ'),
 ('perfetto', 'ADJ'),
 ('levigatrice', 'NOUN'),
 ('dirai', 'VERB'),
 ('lho', 'ADV'),
 ('fatto', 'NOUN'),
 ('black', 'ADJ'),
 ('decker', 'NOUN'),
 ('accessorio', 'ADJ'),
 ('trapano', 'VERB'),
 ('black', 'ADJ'),
 ('decker', 'NOU

In [136]:
#Print the top 10 words of every lustrum

vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_words[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_words[key])

top = 10
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:10]
    for word, frequency in words_top:
        print(key, word, frequency, sep='\t')

1980_1984	('essere', 'AUX')	813
1980_1984	('fare', 'VERB')	190
1980_1984	('avere', 'VERB')	151
1980_1984	('nuovo', 'ADJ')	149
1980_1984	('bello', 'ADJ')	99
1980_1984	('volere', 'VERB')	82
1980_1984	('grande', 'ADJ')	82
1980_1984	('potere', 'NOUN')	81
1980_1984	('quando', 'SCONJ')	77
1980_1984	('sempre', 'ADV')	71

1985_1989	('essere', 'AUX')	2376
1985_1989	('nuovo', 'ADJ')	527
1985_1989	('fare', 'VERB')	524
1985_1989	('avere', 'VERB')	515
1985_1989	('grande', 'ADJ')	307
1985_1989	('oggi', 'ADV')	293
1985_1989	('solo', 'ADV')	291
1985_1989	('potere', 'NOUN')	278
1985_1989	('volere', 'VERB')	247
1985_1989	('bello', 'ADJ')	225

1990_1994	('essere', 'AUX')	2123
1990_1994	('nuovo', 'ADJ')	604
1990_1994	('fare', 'VERB')	552
1990_1994	('avere', 'VERB')	543
1990_1994	('oggi', 'ADV')	297
1990_1994	('solo', 'ADV')	282
1990_1994	('grande', 'ADJ')	272
1990_1994	('potere', 'NOUN')	242
1990_1994	('avere', 'AUX')	235
1990_1994	('ogni', 'DET')	232

1995_1999	('essere', 'AUX')	815
1995_1999	('avere', '

In [137]:
#Print the top 10 ADJECTIVES of every lustrum
spots_words = dict()
spots_adj=dict()
for key in dictionary:
     spots_words[key] = pos_tag_italian(dictionary[key])
for key in spots_words:
    if key not in spots_adj:
        spots_adj[key] = []
    for values in spots_words[key]:
        if 'ADJ' in values:  
            spots_adj[key].append(values)

In [138]:
vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_adj[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_adj[key])

top = 10
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:10]
    for word, frequency in words_top:
        print(key, word, frequency, sep='\t')

1980_1984	('nuovo', 'ADJ')	149
1980_1984	('bello', 'ADJ')	99
1980_1984	('grande', 'ADJ')	82
1980_1984	('difficile', 'ADJ')	61
1980_1984	('fresco', 'ADJ')	53
1980_1984	('buono', 'ADJ')	42
1980_1984	('bianco', 'ADJ')	38
1980_1984	('primo', 'ADJ')	31
1980_1984	('piccolo', 'ADJ')	28
1980_1984	('delicato', 'ADJ')	28

1985_1989	('nuovo', 'ADJ')	527
1985_1989	('grande', 'ADJ')	307
1985_1989	('bello', 'ADJ')	225
1985_1989	('primo', 'ADJ')	190
1985_1989	('vero', 'ADJ')	146
1985_1989	('buono', 'ADJ')	132
1985_1989	('bianco', 'ADJ')	129
1985_1989	('fresco', 'ADJ')	110
1985_1989	('naturale', 'ADJ')	100
1985_1989	('dolce', 'ADJ')	100

1990_1994	('nuovo', 'ADJ')	604
1990_1994	('grande', 'ADJ')	272
1990_1994	('bello', 'ADJ')	190
1990_1994	('bianco', 'ADJ')	173
1990_1994	('primo', 'ADJ')	165
1990_1994	('fresco', 'ADJ')	136
1990_1994	('buono', 'ADJ')	127
1990_1994	('naturale', 'ADJ')	123
1990_1994	('vero', 'ADJ')	102
1990_1994	('dolce', 'ADJ')	79

1995_1999	('nuovo', 'ADJ')	162
1995_1999	('bello', 'ADJ

In [139]:
#Print the top 10 NOUNS of every lustrum
spots_words = dict()
spots_noun=dict()
for key in dictionary:
     spots_words[key] = pos_tag_italian(dictionary[key])
for key in spots_words:
    if key not in spots_noun:
        spots_noun[key] = []
    for values in spots_words[key]:
        if 'NOUN' in values:  
            spots_noun[key].append(values)

In [140]:
vocabularies = dict()
for key in dictionary:
    vocabularies[key] = set(spots_noun[key])

frequencies = dict()
for key in dictionary:
    frequencies[key] = Counter(spots_noun[key])

top = 20
previous_key = None

for key in sorted(dictionary):
    if key != previous_key and previous_key is not None:
        print()
    previous_key = key
    words_top = sorted(frequencies[key].items(), key=lambda x: x[1], reverse=True)[:20]
    for word, frequency in words_top:
              print(key, word, frequency, sep='\t')

1980_1984	('potere', 'NOUN')	81
1980_1984	('piacere', 'NOUN')	62
1980_1984	('sapore', 'NOUN')	49
1980_1984	('casa', 'NOUN')	46
1980_1984	('giorno', 'NOUN')	38
1980_1984	('caffè', 'NOUN')	37
1980_1984	('oro', 'NOUN')	35
1980_1984	('shampoo', 'NOUN')	35
1980_1984	('bambino', 'NOUN')	34
1980_1984	('colore', 'NOUN')	33
1980_1984	('cosa', 'NOUN')	31
1980_1984	('pelle', 'NOUN')	30
1980_1984	('qualità', 'NOUN')	29
1980_1984	('mondo', 'NOUN')	28
1980_1984	('piatto', 'NOUN')	26
1980_1984	('computer', 'NOUN')	26
1980_1984	('latte', 'NOUN')	25
1980_1984	('dovere', 'NOUN')	24
1980_1984	('birra', 'NOUN')	24
1980_1984	('cuore', 'NOUN')	24

1985_1989	('potere', 'NOUN')	278
1985_1989	('giorno', 'NOUN')	187
1985_1989	('piacere', 'NOUN')	168
1985_1989	('casa', 'NOUN')	151
1985_1989	('cosa', 'NOUN')	137
1985_1989	('caffè', 'NOUN')	124
1985_1989	('vita', 'NOUN')	112
1985_1989	('sapore', 'NOUN')	109
1985_1989	('colore', 'NOUN')	108
1985_1989	('cuore', 'NOUN')	94
1985_1989	('pelle', 'NOUN')	94
1985_1989	('f