### Imports

In [4]:
!pip install spacy sentence-transformers
!python -m spacy download en_core_web_sm

Collecting sentence-transformers
  Downloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence-transformers)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence-transform

In [5]:
import nltk
import string
from nltk import pos_tag
from nltk.tokenize import word_tokenize
from nltk.corpus import treebank
from nltk.tree import Tree
from nltk.corpus import stopwords


# Download required resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('treebank')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package treebank to /root/nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
import string
import spacy
from transformers import AutoTokenizer, AutoModel, AutoModelForTokenClassification, pipeline
from sentence_transformers import SentenceTransformer, util
import numpy as np

### Utility functions

In [7]:
def remove_punctuation(text):
    """
    Removal of punction marks if any.

    Args:
    - text (str): String of a statement.

    Returns:
    - _ (str): String without punctuation marks.
    """

    return text.translate(str.maketrans('', '', string.punctuation))

In [8]:
# load bert NER model and create pipeline for NER
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-large-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-large-NER")

# creation of pipeline
ner = pipeline("ner", model=model,  tokenizer=tokenizer,  aggregation_strategy="first")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/40.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/1.45k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-large-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [9]:
def bert_ner(text):
    """
    Retrieval of Named Entity Recognition(NER) Tags from BERT pretrained model.

    Args:
    - text (str): String of a statement.

    Returns:
    - ent (dict): Dictionary of a word mapped with it's NER tag if any.
    """

    # initialization
    ent = {}
    # get result from the pipeline
    ner_results = ner(text)
    # append the required result to dictionary
    for e in ner_results:
        ent[e['word']] = e['entity_group']
    return ent

In [10]:
def aggregate_phrases(text):
    """
    Aggregation of related words and convert them into phrases for easy retrieval of answers.

    Args:
    - text (str): String of a statement.

    Returns:
    - noun_phrases (list): List of chunk of words that represents a noun.
    - verb_phrases (list): List of chunk of words that represents a verb.
    """

    doc = nlp(text)
    noun_phrases = [chunk.text for chunk in doc.noun_chunks]
    verb_phrases = [token.lemma_ for token in doc if token.pos_ == "VERB"]
    return noun_phrases, verb_phrases

In [11]:
def perform_pos_tagging(text):
    """
    Retrieval of Parts Of Speech (POS) Tags from spacy pretrained model.

    Args:
    - text (str): String of a statement.

    Returns:
    - pos_tags (dict): Dictionary of a word mapped with it's POS tag.
    """

    doc = nlp(text)
    pos_tags = {token.text: token.pos_ for token in doc}
    return pos_tags

In [12]:
def perform_dependency_parsing(text):
    """
    Retrieval of dependency parse labels from spacy pretrained model.

    Args:
    - text (str): String of a statement.

    Returns:
    - dependencies (dict): Dictionary of a word mapped with it's dependency label.
    - heads (dict): Dictionary of a word mapped with it's head word in dependency parsing.
    """

    doc = nlp(text)
    # storing dependencies and it's heads separately
    dependencies = {token.text: token.dep_ for token in doc}
    heads = {token.text: token.head.text for token in doc}

    # optional
    # Visualize the dependency tree
    # displacy.render(doc, style='dep', jupyter=True)

    return dependencies, heads

In [13]:
# Load SpaCy model for dependency parsing and NER
nlp = spacy.load("en_core_web_sm")

# Load pre-trained Sentence Transformer model for embeddings
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [14]:
def get_embedding(text):
    """
    Implementation of text embedding using sentence transformer pretrained model.

    Args:
    - text (str): String of question/statement.

    Returns:
    - encoded_text (list): List of floating point values obtained after encoding the text.
    """

    encoded_text = sentence_model.encode(text, convert_to_tensor=True)
    return encoded_text

In [15]:
def cosine_similarity(embedding1, embedding2):
    """
    Implementation of cosine similarity.

    Args:
    - embedding1 (list): Array of embedding of question.
    - embedding2 (list): Array of embedding of sentences.

    Returns:
    - similarity (float): Value of cosine similarity between embedding of a particular statement and a question.
    """

    similarity = util.pytorch_cos_sim(embedding1, embedding2).item()
    return similarity

### Prime function for semantic role labeling

In [16]:
def semantic_roles(s):
    """
    Implementation of Semantic Role Labeling system.

    Args:
    - s (str): String of sentence from which semantic roles must be extracted.

    Returns:
    - roles (dict): Dictionary of semantic roles mapped to the relevant phrase.
    """

    # initialization
    roles = {}

    # preprocessing - only removing punctuation as all other features are necessary to understand the sentence
    s = remove_punctuation(s)

    # NER using bert
    entities = bert_ner(s)
    # Dependency parsing using spacy
    dependencies, heads = perform_dependency_parsing(s)
    # Word aggregation: Words -> Phrases
    aggregates, _ = aggregate_phrases(s)
    # POS tagging
    pos_tagging = perform_pos_tagging(s)

    # tokenization
    doc = nlp(s)
    # finding phrases for nouns if exists
    noun_phrases = [chunk.text for chunk in doc.noun_chunks]
    # finding phrases for words if exists
    word_to_phrase = {word: phrase for phrase in noun_phrases for word in phrase.split()}

    # iteration through aggregates of words i.e, phrases
    for agg in aggregates:
        words = agg.split()
        entity_label = None
        for word in words:
            if word in entities:
                entity_label = entities[word]
                break
        # extract semantic roles based on Named Entities if present
        if entity_label:
            for word in words:
                dep = dependencies.get(word)
                pos = pos_tagging.get(word)
                head = heads.get(word)
                phrase = word_to_phrase.get(word, word)

                if entity_label == 'PER':
                    if pos in ['NOUN', 'PRON', 'PROPN']:
                        if dep == 'nsubj':
                            roles['Agent'] = phrase
                        elif dep in ['dobj', 'pobj']:
                            roles['Patient'] = phrase
                elif entity_label in ['LOC']:
                    if pos in ['NOUN', 'PRON', 'PROPN']:
                        if dep in ['dobj', 'pobj']:
                            roles['Destination'] = phrase
                        elif dep == 'nsubj':
                            roles['Source'] = phrase
                elif entity_label in ['MISC']:
                    if pos in ['NOUN', 'PRON', 'PROPN']:
                        if dep in ['compound', 'pobj']:
                            roles['Instrument'] = phrase
        else:
            # extract semantic roles based on POS tags and Dependency tags
            for word in words:
                dep = dependencies.get(word)
                pos = pos_tagging.get(word)
                head = heads.get(word)
                phrase = word_to_phrase.get(word, word)

                if pos in ['NOUN', 'PRON', 'PROPN']:
                    if dep in ['nsubj','compound','nsubjpass']:
                        if 'Agent' not in roles:
                            roles['Agent'] = phrase
                    elif dep == 'dobj':
                        roles['Patient'] = phrase
                    elif dep == 'pobj':
                        if head == 'with':
                            roles['Instrument'] = phrase
                        elif head == 'for':
                            roles['Beneficiary'] = phrase
                        elif head == 'from':
                            roles['Source'] = phrase
                        elif head == 'to' :
                            roles['Destination'] = phrase
                        elif head == 'by':
                            roles['Patient'] = phrase
                elif pos in ['VERB']:
                    roles['Predicate'] = phrase

    # Handle prepositional phrases separately
    for token in doc:
        phrase = word_to_phrase.get(token.text, token.text)

        # Extract semantic roles based on POS tags
        if token.pos_ == 'NOUN':
            if token.dep_ == 'nsubj' and token.head.dep_ == 'ROOT':
                if 'Agent' not in roles:
                    roles['Agent'] = phrase
            elif token.dep_ == 'dobj':
                roles['Patient'] = phrase

        # Extract semantic roles based on dependency labels
        if token.dep_ == 'ROOT':
            roles['Predicate'] = token.lemma_
        elif token.dep_ == 'prep' and token.text in ['to','in']:
            for child in token.children:
                if child.dep_ == 'pobj':
                    roles['Destination'] = word_to_phrase.get(child.text, child.text)
        elif token.dep_ == 'prep' and token.text == 'with':
            for child in token.children:
                if child.dep_ == 'pobj':
                    roles['Instrument'] = word_to_phrase.get(child.text, child.text)
        elif token.dep_ == 'prep' and token.text == 'for':
            for child in token.children:
                if child.dep_ == 'pobj':
                    roles['Beneficiary'] = word_to_phrase.get(child.text, child.text)
        elif token.dep_ == 'prep' and token.text in ['on', 'in']:
            for child in token.children:
                if child.dep_ in ['pobj', 'dobj']:
                    roles['Patient'] = word_to_phrase.get(child.text, child.text)
        elif token.dep_ == 'prep' and token.text == 'from':
            for child in token.children:
                if child.dep_ == 'pobj':
                    roles['Source'] = word_to_phrase.get(child.text, child.text)

    return roles

### Application of semantic role labeling: Question Answering system

In [31]:
def answer_question(question, text):
    """
    Implementation of QA system.

    Args:
    - question (str): String of question.
    - text (str): String of paragraph on which question is asked.

    Returns:
    - answer (str): String of relevant phrase from the paragraph for the given question.
    """

    # Tokenization and dependency parsing for both the question and the text passage
    question_doc = nlp(question)
    text_doc = nlp(text)

    # get embeddings for the text and question
    question_embedding = get_embedding(question)
    text_sentences = [sent.text for sent in text_doc.sents]
    text_embeddings = get_embedding(text_sentences)

    # Finding the most relevant sentence in the text based on cosine similarity
    similarities = util.pytorch_cos_sim(question_embedding, text_embeddings).squeeze()
    most_relevant_idx = np.argmax(similarities)
    most_relevant_sentence = text_sentences[most_relevant_idx]

    # Extracting semantic roles from the most relevant sentence
    text_roles = semantic_roles(most_relevant_sentence)
    print("Semantic role labeling of relevant sentence: ",text_roles)

    # Initialize the answer
    answer = ""

    # Analyze the question to determine the type of information being asked for
    if any(word in question.lower() for word in ['who', 'whose']):
        if 'Agent' in text_roles:
            answer = text_roles['Agent']
    elif any(word in question.lower() for word in ['how', 'with what']):
        if 'Instrument' in text_roles:
            answer = text_roles['Instrument']
    elif any(word in question.lower() for word in ['what', 'which']):
        # in case of ambiguity, predict multiple answers
        if 'Patient' in text_roles:
            answer = text_roles['Patient']
            answer += ", "
        if 'Predicate' in text_roles:
            answer += text_roles['Predicate']
    elif any(word in question.lower() for word in ['whom']):
        if 'Patient' in text_roles:
            answer = text_roles['Patient']
    elif any(word in question.lower() for word in ['where']):
        if 'Destination' in text_roles:
            answer = text_roles['Destination']
            answer += ", "
        if 'Patient' in text_roles:
            answer += text_roles['Patient']
    elif any(word in question.lower() for word in ['how']):
        if 'Predicate' in text_roles:
            answer = text_roles['Predicate']
    elif any(word in question.lower() for word in ['when']):
        if 'Beneficiary' in text_roles:
            answer = text_roles['Beneficiary']
            answer += ", "
        if 'Patient' in text_roles:
            answer += text_roles['Patient']

    return answer

### Example usages

In [42]:
# Example usage
paragraph = """John was the one behind the mass protest held recently in Mumbai. John sold a car to Mary in the Bangalore. He is very good at hypnotizing people."""
question = "Where did John sell the car?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'John', 'Patient': 'a car', 'Destination': 'the Bangalore', 'Predicate': 'sell'}
Answer:  the Bangalore, a car


In [43]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing."""
question = "What did Alice do?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'Alice', 'Patient': 'Tuesday afternoon', 'Predicate': 'visit'}
Answer:  Tuesday afternoon, visit


In [44]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar during free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing."""
question = "Who enjoys playing guitar?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'Bob', 'Patient': 'the guitar', 'Predicate': 'enjoy'}
Answer:  Bob


In [45]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing."""
question = "What did Sarah buy for the upcoming party?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'Sarah', 'Patient': 'a new dress', 'Beneficiary': 'the upcoming party', 'Predicate': 'buy'}
Answer:  a new dress, buy


In [46]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "Who serves the best coffee in the city?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'The new cafe downtown', 'Patient': 'the best coffee', 'Predicate': 'serve', 'Destination': 'the city'}
Answer:  The new cafe downtown


In [47]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "How is Jenny taking her course?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'Jenny', 'Patient': 'digital marketing', 'Instrument': 'her laptop', 'Predicate': 'take'}
Answer:  her laptop


In [48]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "Who serves the best coffee in the city?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'The new cafe downtown', 'Patient': 'the best coffee', 'Predicate': 'serve', 'Destination': 'the city'}
Answer:  The new cafe downtown


In [49]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "When is rain predicted?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'The weather forecast', 'Patient': 'rain', 'Beneficiary': 'the weekend', 'Predicate': 'predict'}
Answer:  the weekend, rain


In [50]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "When did Alice visit museum?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'Alice', 'Patient': 'Tuesday afternoon', 'Predicate': 'visit'}
Answer:  Tuesday afternoon


In [51]:
# Example usage
paragraph = """Alice visited the museum on Tuesday afternoon. The stock market experienced a significant drop last week. Bob enjoys playing the guitar in his free time. The weather forecast predicts rain for the weekend. Sarah bought a new dress for the upcoming party. The new cafe downtown serves the best coffee in the city. Mike is planning a hiking trip next month. The movie was released to critical acclaim. The cat slept peacefully on the windowsill. Jenny is taking a course on digital marketing with her laptop."""
question = "Where was cat sleeping?"
print("Answer: ", answer_question(question, paragraph))

Semantic role labeling of relevant sentence:  {'Agent': 'The cat', 'Predicate': 'sleep', 'Patient': 'the windowsill'}
Answer:  the windowsill


### Evaluation metrics

In [28]:
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

In [29]:
def evaluate_qa_system_with_cosine_similarity(y_pred, ground_truth_answers):
    """
    Evaluate the QA system using cosine similarity.

    Args:
    - y_pred (list of str): List of preddicted answers.
    - ground_truth_answers (list of str): List of ground truth answers.

    Returns:
    - float: Average cosine similarity score.
    """

    # initializations
    embeddings_true = []
    embeddings_pred = []
    cosine_similarities = []

    # computing cosine similarity for each predicted output with ground truth
    for yt, yp, i in zip(ground_truth_answers, y_pred, range(len(y_pred))):
        embeddings_true.append(sentence_model.encode(yt[i]))
        embeddings_pred.append(sentence_model.encode(yp[i]))
        cosine_similarities.append(cosine_similarity(embeddings_true[i], embeddings_pred[i]))
    print("Cosine similarity: ",cosine_similarities)

    # computing average cosine similarity for given samples
    average_cosine_similarity = np.mean([cosine_similarities[i] for i in range(len(y_pred))])

    return average_cosine_similarity

In [30]:
# Example usage
paragraphs = [paragraph]
questions = ["What did Alice do?", "Who enjoys playing guitar?", "What did Sarah buy for the upcoming party?", "How is Jenny taking her course?", "Who serves the best coffee in the city?", "When is rain predicted?","When did Alice visit museum?", "Where was cat sleeping?" ]
ground_truth_answers = ["visit museum", "Bob", "a new dress", "through her laptop", "the new cafe", "the weekend", "tuesday afternoon", "in the windowsill"]
y_pred = ["Tuesday afternoon, visit", "Bob","a new dress, buy","her laptop", "The new cafe downtown", "the weekend, rain", "Tuesday afternoon", "the windowsill"]
metrics = evaluate_qa_system_with_cosine_similarity(y_pred, ground_truth_answers)
print("Average cosine similarity achieved: ", metrics)

Cosine similarity:  [0.40277010202407837, 0.9999999403953552, 1.000000238418579, 0.575816810131073, 1.000000238418579, 1.0000001192092896, 1.0, 0.5372871160507202]
Average cosine similarity achieved:  0.8144843205809593
