In [None]:
!pip install sentence-transformers
!pip install sklearn

In [None]:
import nltk
nltk.download('punkt')

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from nltk import sent_tokenize

from sentence_transformers import SentenceTransformer

In [26]:
def process_bert_similarity(base_document,documents):
  # This will download and load the pretrained model offered by UKPLab.
  model = SentenceTransformer('bert-base-nli-mean-tokens')

	# Although it is not explicitly stated in the official document of sentence transformer, the original BERT is meant for a shorter sentence. We will feed the model by sentences instead of the whole documents.
  sentences = sent_tokenize(base_document)
  base_embeddings_sentences = model.encode(sentences)
  base_embeddings = np.mean(np.array(base_embeddings_sentences), axis=0)
  
  vectors = []
  for i, document in enumerate(documents):
    sentences = sent_tokenize(document)
    embeddings_sentences = model.encode(sentences)
    embeddings = np.mean(np.array(embeddings_sentences), axis=0)
    
    vectors.append(embeddings)
    print("making vector at index:", i)
    
  scores = cosine_similarity([base_embeddings], vectors).flatten()
  
  highest_score = 0
  highest_score_index = 0
  
  for i, score in enumerate(scores):
    if highest_score < score:
      highest_score = score
      highest_score_index = i
      
  most_similar_document = documents[highest_score_index]
  print("---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------\n")
  print(f"Most similar document: {most_similar_document} \n\n\nThe Score: {highest_score}")

In [29]:
base_document = "Technologies are artifacts made through a systematic application of knowledge and used to reach practical goals"

documents = ["""Sports are defined as physical or mental exertion by individuals and are committed to maintaining physical or mental fitness. 
There are many types of exercise that can be practiced  as a healthy habit, such as walking, 
riding, Swimming, or athletics, and these sports have many benefits on the human body and mind.
Sport make the body ideal, consistent, and healthy, as it strengthens it, and fights obesity. 
Prevention of many diseases, including: Obesity, which is a cause of other diseases, such as: 
Diabetic cartilage disease, diabetes and high blood pressure.""",

"""Scientific innovations or technology is the most amazing thing that can ever happen to humankind. 
In this modern world, technology is no more a luxury, and it has become a necessity. 
Life without technology is next to impossible but humans have become dependent on technological advancements. 
For every human activity, we need a machine, and then there comes technology. 
It has made all our lives easier and more convenient, making transportation, education, labour, etc., more accessible. 
Technological breakthroughs change from time to time in today’s fast-paced, ever-changing world. 
However, in today’s world, technology plays a critical role in strengthening a country’s economy and people’s lives."""]



process_bert_similarity(base_document,documents)

making vector at index: 0
making vector at index: 1
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Most similar document: Scientific innovations or technology is the most amazing thing that can ever happen to humankind. 
In this modern world, technology is no more a luxury, and it has become a necessity. 
Life without technology is next to impossible but humans have become dependent on technological advancements. 
For every human activity, we need a machine, and then there comes technology. 
It has made all our lives easier and more convenient, making transportation, education, labour, etc., more accessible. 
Technological breakthroughs change from time to time in today’s fast-paced, ever-changing world. 
However, in today’s world, technology plays a critical role in strengthening a country’s economy and people’s lives. 