In [1]:
from nltk.tokenize import word_tokenize
import re
import nltk
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
from scipy.spatial import distance
import matplotlib.pyplot
from sentence_transformers import SentenceTransformer

In [2]:
#rimuovo la punteggiatura
def remove_punctuation(sentence):
    return re.sub(r'[^\w\s]', '', sentence)

#rimuovo le stopwords
def remove_stopwords(words_list):
    stopwords = open("stop_words_FULL.txt", "r") 
    stopwords_list = [] 
    for word in stopwords:
        stopwords_list.append(word.replace('\n', ''))
    stopwords.close()
    return [value.lower() for value in words_list if value.lower() not in stopwords_list] #restituisco la lista di stopwords trovate
    
#effettuo lo splitting della frase sfruttando gli spazi e poi ogni parola viene portare al suo lemma
def tokenize_sentences(sentence):
    word_list= []
    lemma = WordNetLemmatizer()
    for tag in nltk.pos_tag(word_tokenize(sentence)):
        if(tag[1][:2] == 'NN'):
            word_list.append(lemma.lemmatize(tag[0], pos = wn.NOUN))
        elif(tag[1][:2] == 'VB'):
            word_list.append(lemma.lemmatize(tag[0], pos = wn.VERB))
        elif(tag[1][:2] == 'RB'):
            word_list.append(lemma.lemmatize(tag[0], pos = wn.ADV))
        elif(tag[1][:2] == 'JJ'):
            word_list.append(lemma.lemmatize(tag[0], pos = wn.ADJ))
    
    return word_list

#operazione di preprocessing che effettua le operazioni di Text Cleaning svolte precedentemente
def pre_processing (sentence):
    return remove_stopwords(tokenize_sentences(remove_punctuation(sentence)))

In [3]:
def definitons_lenght(word):
    lens = []

    for sys in wn.synsets(word):

        path= []

        #for i in range(0, len(sys.definition().split(" "))):
        path.append((sys.definition(),len(sys.definition().split(" "))))

        print("Term {}".format(sys), ":" ,path)
        print()
        lens.append(path)

    return lens

In [4]:
for w in ['Courage', 'Paper', 'Apprehension', 'Sharpener']: 
    print("\n------------------------\n")
    print("Concept: ", w)
    definitons_lenght(w)


------------------------

Concept:  Courage
Term Synset('courage.n.01') : [('a quality of spirit that enables you to face danger or pain without showing fear', 15)]


------------------------

Concept:  Paper
Term Synset('paper.n.01') : [('a material made of cellulose pulp derived mainly from wood or rags or certain grasses', 15)]

Term Synset('composition.n.08') : [('an essay (especially one written as an assignment)', 8)]

Term Synset('newspaper.n.01') : [('a daily or weekly publication on folded sheets; contains news and articles and advertisements', 14)]

Term Synset('paper.n.04') : [('a medium for written communication', 5)]

Term Synset('paper.n.05') : [('a scholarly article describing the results of observations or stating hypotheses', 11)]

Term Synset('newspaper.n.02') : [('a business firm that publishes newspapers', 6)]

Term Synset('newspaper.n.03') : [('the physical object that is the product of a newspaper publisher', 11)]

Term Synset('paper.v.01') : [('cover with paper'

In [5]:
def total_hypernym_paths(word):
    
    lens = []
    
    for syn in wn.synsets(word):
        path = []
        
        hyp_path = syn.hypernym_paths()
        
        for i in range (0, len(hyp_path[0])):
            
            path.append((hyp_path[0][i],len((hyp_path[0][i].definition()).split())))


        print("hypernym distance from root: \n", (syn.hypernym_distances()))

        print()
        
        print(path)
        print()
        lens.append(path)

    return lens

In [6]:
for w in ['Courage', 'Paper', 'Apprehension', 'Sharpener']: 
    print("\n------------------------\n")
    print("Concept: ", w)
    total_hypernym_paths(w)


------------------------

Concept:  Courage
hypernym distance from root: 
 {(Synset('courage.n.01'), 0), (Synset('trait.n.01'), 3), (Synset('attribute.n.02'), 4), (Synset('abstraction.n.06'), 5), (Synset('entity.n.01'), 6), (Synset('spirit.n.03'), 1), (Synset('character.n.03'), 2)}

[(Synset('entity.n.01'), 17), (Synset('abstraction.n.06'), 11), (Synset('attribute.n.02'), 9), (Synset('trait.n.01'), 7), (Synset('character.n.03'), 18), (Synset('spirit.n.03'), 9), (Synset('courage.n.01'), 15)]


------------------------

Concept:  Paper
hypernym distance from root: 
 {(Synset('paper.n.01'), 0), (Synset('matter.n.03'), 3), (Synset('part.n.01'), 3), (Synset('entity.n.01'), 6), (Synset('entity.n.01'), 5), (Synset('relation.n.01'), 4), (Synset('physical_entity.n.01'), 4), (Synset('substance.n.01'), 2), (Synset('material.n.01'), 1), (Synset('abstraction.n.06'), 5)}

[(Synset('entity.n.01'), 17), (Synset('physical_entity.n.01'), 6), (Synset('matter.n.03'), 7), (Synset('substance.n.01'), 11), (

In [7]:
#Trovo gli antonimi relativi a ciascun concetto 
def find_antonyms(word):
    antonyms = []
    lens = []

    for sys in wn.synsets(word):
        for l in sys.lemmas():
            if l.antonyms():
                antonyms.append((l.antonyms()[0], len((l.antonyms()[0].name()).split())))

    
    print("Antonym : {}".format(antonyms))
    lens.append(antonyms)

    return antonyms

In [8]:
for w in ['Courage', 'Paper', 'Apprehension', 'Sharpener']: 
    print("\n------------------------\n")
    print("Concept: ", w)
    find_antonyms(w)


------------------------

Concept:  Courage
Antonym : [(Lemma('cowardice.n.01.cowardice'), 1)]

------------------------

Concept:  Paper
Antonym : []

------------------------

Concept:  Apprehension
Antonym : []

------------------------

Concept:  Sharpener
Antonym : []


In [9]:
#Vado a vedere la similiarita' tra le definizioni di iperonimi e le definizioni del concetto preso in analisi

def similarity_search(word):

    model = SentenceTransformer('all-MiniLM-L6-v2') 

    for sys in wn.synsets(word):
        sim = 0

        definitions = sys.definition()

        print("concetto {} :".format(sys))
        #print('\n')

        hyper_list =sys.hypernyms()

        for h in hyper_list:
            hy_def = h.definition()
            hyper_def_list = []

            hyper_def_list.append(definitions)
            hyper_def_list.append(hy_def)
            hyper_def_list_sim = model.encode(hyper_def_list)

            sim += 1 - distance.cosine(hyper_def_list_sim[0], hyper_def_list_sim[1])

            if(len(hyper_list) != 0):
                print("Average Similarity for hypernyms :", sim/len(hyper_list))

                print()

In [10]:
for w in ['Courage', 'Paper', 'Apprehension', 'Sharpener']: 
    print("\n------------------------\n")
    print("Concept: ", w)
    similarity_search(w)


------------------------

Concept:  Courage
concetto Synset('courage.n.01') :
Average Similarity for hypernyms : 0.4360363483428955


------------------------

Concept:  Paper
concetto Synset('paper.n.01') :
Average Similarity for hypernyms : 0.22500497102737427

concetto Synset('composition.n.08') :
Average Similarity for hypernyms : 0.4687023162841797

concetto Synset('newspaper.n.01') :
Average Similarity for hypernyms : 0.5600683093070984

concetto Synset('paper.n.04') :
Average Similarity for hypernyms : 0.49245941638946533

concetto Synset('paper.n.05') :
Average Similarity for hypernyms : 0.23190630972385406

concetto Synset('newspaper.n.02') :
Average Similarity for hypernyms : 0.7873624563217163

concetto Synset('newspaper.n.03') :
Average Similarity for hypernyms : 0.31530842185020447

concetto Synset('paper.v.01') :
Average Similarity for hypernyms : 0.5753957033157349

concetto Synset('wallpaper.v.01') :
Average Similarity for hypernyms : 0.40710264444351196


------------