# Step 1 & 2: Install and Import Required Libraries

In [1]:
import pandas as pd
import spacy
import string
import nltk
from nltk.corpus import stopwords



nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Step 3: Load spaCy model

In [2]:
nlp = spacy.load("en_core_web_sm")

# Step 4: Load the Dataset

In [4]:
df = pd.read_csv("Reviews.csv", on_bad_lines = 'skip', engine = 'python')


reviews = df['Text'].dropna()
reviews = reviews.dropna()
reviews = reviews[:10000]

# Step 5: Preprocessing

In [5]:
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    return text

reviews = reviews.apply(preprocess)

# Step 6: Tokenization and stopword removal

In [6]:
def tokenize(text):
    tokens = [word for word in text.split() if word.isalpha() and word not in stop_words]
    return tokens

tokenized_reviews = reviews.apply(tokenize)

# Step 7 & 8: POS tagging and NER with spaCy

In [7]:
for i, review in enumerate(reviews[:5]):
    print(f"\n--- Review {i+1} ---")
    doc = nlp(review)

    print("POS Tags:")
    for token in doc:
        print(f"{token.text} -> {token.pos_}")

    print("\nNamed Entities:")
    for ent in doc.ents:
        print(f"{ent.text} -> {ent.label_}")


--- Review 1 ---
POS Tags:
i -> PRON
have -> AUX
bought -> VERB
several -> ADJ
of -> ADP
the -> DET
vitality -> NOUN
canned -> VERB
dog -> NOUN
food -> NOUN
products -> NOUN
and -> CCONJ
have -> AUX
found -> VERB
them -> PRON
all -> PRON
to -> PART
be -> AUX
of -> ADP
good -> ADJ
quality -> NOUN
the -> DET
product -> NOUN
looks -> VERB
more -> ADV
like -> ADP
a -> DET
stew -> NOUN
than -> ADP
a -> DET
processed -> VERB
meat -> NOUN
and -> CCONJ
it -> PRON
smells -> VERB
better -> ADJ
my -> PRON
labrador -> NOUN
is -> AUX
finicky -> ADJ
and -> CCONJ
she -> PRON
appreciates -> VERB
this -> DET
product -> NOUN
better -> ADV
than -> ADP
  -> SPACE
most -> ADV

Named Entities:

--- Review 2 ---
POS Tags:
product -> NOUN
arrived -> AUX
labeled -> VERB
as -> ADP
jumbo -> NOUN
salted -> VERB
peanutsthe -> ADJ
peanuts -> NOUN
were -> AUX
actually -> ADV
small -> ADJ
sized -> ADJ
unsalted -> VERB
not -> PART
sure -> ADJ
if -> SCONJ
this -> PRON
was -> AUX
an -> DET
error -> NOUN
or -> CCONJ
if 