In [10]:
!pip install spacy pandas scikit-learn
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [11]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [12]:
df = pd.read_csv("Reviews.csv")
df = df.dropna(subset=["Text"]).head(1000)



In [13]:
nlp = spacy.load("en_core_web_sm")

def preprocess(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc
              if token.is_alpha and not token.is_stop]
    return " ".join(tokens)

df["cleaned"] = df["Text"].apply(preprocess)
print(df[["Text","cleaned"]].head(2))



                                                Text  \
0  I have bought several of the Vitality canned d...   
1  Product arrived labeled as Jumbo Salted Peanut...   

                                             cleaned  
0  buy vitality can dog food product find good qu...  
1  product arrived label jumbo salt peanut peanut...  


In [14]:
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["cleaned"])



In [15]:
query = "Great taste and fast delivery"
query_clean = preprocess(query)
query_vec = vectorizer.transform([query_clean])



In [16]:
# cosine similarity
sims = cosine_similarity(query_vec, tfidf_matrix).flatten()

# top 5 matches
top_k = sims.argsort()[-5:][::-1]
for i in top_k:
    print(f"Score: {sims[i]:.4f} | Review: {df.iloc[i]['Text'][:200]}...")


Score: 0.5706 | Review: This was the best deal ever. The delivery was fast, the chips are delicious and very fresh.  Love it:)...
Score: 0.5025 | Review: This is a good quality brand of chocolate with many flavours of which this is just one. Very tasty and a fast delivery too...
Score: 0.3499 | Review: We had trouble finding this locally - delivery was fast, no more hunting up and down the flour aisle at our local grocery stores....
Score: 0.3209 | Review: this has to be one of the best teas I have ever tasted... it's clean, bright, fresh...<br /><br />great delivery...again quality... just try it......
Score: 0.2535 | Review: These where great couldnt stop eating them.<br />Lowest priced anywhere and will buy even more next time.<br />Cant say enough about these, they where shipped faster than what I paid to have shipped a...
