In [4]:
# pip install spacy
#!python -m spacy download en_core_web_md
import nltk
import spacy
from nltk.corpus import wordnet
nltk.download('omw-1.4')

[nltk_data] Downloading package omw-1.4 to C:\Users\Asif
[nltk_data]     Sayyed\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [5]:
# loading the pretrained model
NLP = spacy.load("en_core_web_md")

In [17]:
def calculate_similarity(input1: str, input2: str) -> float:
    """
    This function calculates the semantic similarity between two input strings.
    It supports comparing single words, sentences, and word-sentence combinations.
    The function uses WordNet for word-level similarity and Spacy for sentence-level similarity.

    Parameters:
    input1 (str): The first input string.
    input2 (str): The second input string.

    Returns:
    float: The semantic similarity score between the input strings.
           The score ranges from 0.0 (no similarity) to 1.0 (high similarity).
    """
    # Split inputs into words to check their type
    input1_words = input1.split()
    input2_words = input2.split()
    
    if len(input1_words) == 1 and len(input2_words) == 1:
        # Both inputs are single words. Use WordNet for comparison.
        synsets1 = wordnet.synsets(input1)
        synsets2 = wordnet.synsets(input2)
        
        if synsets1 and synsets2:  # Check if synsets are found
            synset1 = synsets1[0]
            synset2 = synsets2[0]
            similarity = synset1.wup_similarity(synset2)
            return similarity if similarity is not None else 0.0
        else:
            return 0.0  # Return 0.0 if no synsets are found

    elif len(input1_words) > 1 and len(input2_words) > 1:
        # Both inputs are sentences. Use spaCy for similarity.
        doc1 = NLP(input1)
        doc2 = NLP(input2)
        similarity = doc1.similarity(doc2)
        return similarity

    else:
        # One input is a single word and the other is a sentence.
        word = input1 if len(input1_words) == 1 else input2
        sentence = input1 if len(input1_words) > 1 else input2
        word_synsets = wordnet.synsets(word)
        
        if not word_synsets:
            return 0.0  # Return 0.0 if no synsets are found for the word
        
        word_synset = word_synsets[0]
        sentence_doc = NLP(sentence)
        
        max_similarity = 0.0
        for token in sentence_doc:
            token_synsets = wordnet.synsets(token.text)
            if token_synsets:
                token_synset = token_synsets[0]
                similarity = word_synset.wup_similarity(token_synset)
                if similarity and similarity > max_similarity:
                    max_similarity = similarity
        
        return max_similarity

In [21]:
calculate_similarity("slim", "shady")

0.3333333333333333

In [22]:
calculate_similarity("Eminem is the real Slim Shady", "The real Slim Shady just stands up, so everyone should pay attention!")

0.5172432380994461

In [23]:
calculate_similarity("rap", "Eminem once rapped so fast even the mic got dizzy")

0.25