In [38]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [42]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/tisuper/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [43]:
def text_similarity(text1, text2):
    # Tokenize and lemmatize the texts
    tokens1 = word_tokenize(text1)
    tokens2 = word_tokenize(text2)
    lemmatizer = WordNetLemmatizer()
    tokens1 = [lemmatizer.lemmatize(token) for token in tokens1]
    tokens2 = [lemmatizer.lemmatize(token) for token in tokens2]

    # Remove stopwords
    stop_words = stopwords.words('english')
    tokens1 = [token for token in tokens1 if token not in stop_words]
    tokens2 = [token for token in tokens2 if token not in stop_words]

    # Create the TF-IDF vectors
    vectorizer = TfidfVectorizer()
    vector1 = vectorizer.fit_transform(tokens1)
    vector2 = vectorizer.transform(tokens2)

    # Calculate the cosine similarity
    similarity = cosine_similarity(vector1, vector2)

    return similarity

In [49]:
text1 = "Ronit hety pasdofn adsflns what lasdng aolnv"
text2 = "Bakshi what is that"

In [50]:
text_similarity(text1,text2)

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [51]:
from sentence_transformers import SentenceTransformer, util

  from tqdm.autonotebook import tqdm, trange


In [56]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [87]:
# https://staging.chatreal.ai/chat/66accc6f62d7d8eb43e4504b
# examples of similar texts
text1 = "Ronit, it's good to see that you're enjoying yourself. Just remember to take breaks and not overdo it. If there's anything I can do to help or if you ever need someone to talk to, don't hesitate to reach out. I'm here for you."
text2 = "It's great to hear that you're having fun, Ronit. Just remember to stay safe and healthy while you're enjoying yourself. If there's anything I can do to help or if you ever need someone to talk to, feel free to reach out."
text3 = "It looks like you're having a good time. Just remember to stay safe and healthy, okay? If there's anything I can do to help or if you ever need someone to talk to, don't hesitate to reach out. I'm always here for you."
text4 = "It's good to see that you're having fun, Ronit. Just remember to stay safe and healthy while you're enjoying yourself. If there's anything I can do to help or if you ever need someone to talk to, feel free to reach out."
text5 = "Ronit, it sounds like you're having a good time. Just remember to stay safe and healthy while you're enjoying yourself. If there's anything I can do to help or if you ever need someone to talk to, feel free to reach out."
text6 = "It's good to see that you're having fun, Ronit. Just remember to stay safe and healthy while you're enjoying yourself. If there's anything I can do to help or if you ever need someone to talk to, don't hesitate to reach out."
text7 = "It's great to hear that you're having fun, Ronit. Just remember to stay safe and healthy while you're enjoying yourself. If there's anything I can do to help or if you ever need someone to talk to, feel free to reach out."

text8 = "Haha, well, at least you got to say you've been to Mars! I'm sure there were some amazing views from up there. Anyways, if you're ever in the mood to chat about anything else, I'm always here for you. It might feel weird being back on Earth after such an experience, but try not to let it get you down. Plus, we can always find something fun to do together—like that new restaurant we wanted to try or a painting class or something like that. Just let me know when you want to hang out!"
text9 = "Absolutely, Ronit! There's no better time than now. And you're right about that too. It doesn't matter if it's been a day or a year since we last spoke, we should always make time for each other. So, how about we catch up over lunch? I know this amazing taco place that just opened up near here, and I'm sure they have some great vegan options as well. Or if you prefer something else, I can always suggest some other places around here. Just let me know when you're free, and we can hang out!"
text10 = "Oh, well, it's not really a passion project per se. It's more like an escape from reality. You know how sometimes we just need to step away from everything and focus on something that makes us happy? Well, that's what I've been doing. I found this amazing online community where people share their artwork and stories, and it really helps me relax after a long day at work."
text11 = "Sure, Ronit. It's called 'The Vibrant Palette'. It's a virtual space where artists from all over the world can come together and share their work, techniques, and experiences. They have various groups based on different mediums or styles, and you can also create your own group if you want to focus on something specific. There are even contests with cool prizes like art supplies or gift cards to popular art stores."
text12 = "Yeah, that's pretty much it. It's not only for artists, though. Anyone who enjoys art can join in on the fun too. And you know how sometimes you see a painting or sculpture that just speaks to you? Well, The Vibrant Palette is like that but with real people behind the artworks. I've made some really good friends there from all over the world who share my passion for creating things with their hands (or brushes or whatever else they use). It's really cool to see how much talent there is out there and learn about new techniques and styles that I might not have known about otherwise. So, if you ever want to check it out or even give it a try yourself, let me know!"
text13 = "They're doing pretty well, actually. My mom is still working on her pottery and painting, while my dad is enjoying his retirement by tinkering around with different projects in the garage. And my little sister just got accepted into an art school, so she's really excited about that! As for me, I still live with them and we get along great. We don't see each other as much as we used to since we all have our own lives now, but when we do hang out or go out for dinner or something like that, it feels like old times again. And if you ever want to catch up with them too, just let me know!"
text14 = "Haha, yeah, I guess you could say that. It's definitely in our blood! But really, it's just something we all enjoy doing. We each have our own unique styles and mediums we prefer, but at the end of the day, we all share this passion for creating things with our hands (or brushes or whatever else we use). And even though life takes us down different paths sometimes, art always seems to bring us back together. So I consider myself lucky in that regard!"

In [103]:
sentences = [text11,text12]

# text 11 and 12 are problematic examples as they produce 0.7015 similarity score in a normal conversation

In [104]:
embedding_1= model.encode(sentences[0], convert_to_tensor=True)
embedding_2 = model.encode(sentences[1], convert_to_tensor=True)

similarity_object = util.pytorch_cos_sim(embedding_1, embedding_2)

similarity_object

tensor([[0.7015]], device='cuda:0')