In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Load data and preprocess

In [2]:
file_path = "data/Reviews.csv" 

df = pd.read_csv(file_path)
df = df.dropna(subset=['Text'])
df = df.head(1000).reset_index(drop=True)

process with SpaCy

In [3]:
nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])

def preprocess_text(text):
    doc = nlp(text.lower())  # convert to lowercase and process with spaCy
    tokens = [
        token.lemma_ for token in doc
        if token.is_alpha and not token.is_stop
    ]
    return " ".join(tokens)

In [5]:
df['cleaned_text'] = df['Text'].apply(preprocess_text)

TF-IDF the text

In [6]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['cleaned_text'])

Processing querys (from user)

In [None]:
def process_query(query):
    return preprocess_text(query)

def retrieve_reviews(user_query, top_k=5):
    clean_query = process_query(user_query)
    query_vector = vectorizer.transform([clean_query])

    
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()

    top_indices = similarities.argsort()[::-1][:top_k]
    results = df.iloc[top_indices][['Text', 'Score']]
    results['similarity'] = similarities[top_indices]

    return results

SAMPLE query

In [8]:
user_query = input("Enter your query: ")

print("\nTop matching reviews:\n")
results = retrieve_reviews(user_query, top_k=5)

for idx, row in results.iterrows():
    print(f"⭐ Score: {row['Score']} | Similarity: {row['similarity']:.3f}")
    print(row['Text'])
    print("-" * 80)


Top matching reviews:

⭐ Score: 5 | Similarity: 0.430
My daughter is outgrowing baby food but will still eat this.  We tried 3rd foods on her and she doesn't like the texture of them.  This is really good for her and she likes it, so we keep buying it.
--------------------------------------------------------------------------------
⭐ Score: 5 | Similarity: 0.389
From 6 month to 12 month, my son barely ate any solid food.  He was so picky.  We tried many different flavor from various brands and this was the only baby food that he'd actually finish the whole jar.  I didn't expect him to like this since it has very green vegetable scent to it, but I guessed it wrong.<br />Now he is 18 month old and he is a pretty healthy eater.  His current favorite seems to be tofu, but he still loves corn, zucchini, brown rice and carrot which are the ingredients of this.  I highly recommend to try this baby food.<br /><br />Also, Earth's Best makes not only the best baby foods but their toddler foods 