In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import pandas as pd

file_path = "/content/drive/MyDrive/Colab Notebooks/AI - BootCamp -  Assignment - CCE/product_reviews.csv"

for enc in ["utf-8", "latin1", "cp1252", "ISO-8859-1"]:
    try:
        df = pd.read_csv(file_path, encoding=enc)
        print(f"Loaded with encoding: {enc}")
        break
    except UnicodeDecodeError:
        continue

print(df.head(3))
print(df.columns.tolist())


Loaded with encoding: latin1
   Review_ID                                        Review_Text
0          1  "The product is GREAT! Loved it, but its a bi...
1          2  "Worst product ever!! Wouldnt recommend to an...
2          3  "Satisfactory quality, works as expected, no m...
['Review_ID', 'Review_Text']


In [12]:
candidates = [c for c in df.columns if c.lower() in {
    "review","reviews","review_text","text","comment","comments","content","body","feedback","reviewtext"
}]
if candidates:
    REVIEW_COL = candidates[0]
else:
    REVIEW_COL = df.select_dtypes(include=["object"]).columns[0]

print("Using review column:", REVIEW_COL)


Using review column: Review_Text


In [13]:
import re

df[REVIEW_COL] = df[REVIEW_COL].astype(str).fillna("")
df["lower"] = df[REVIEW_COL].str.lower()


In [14]:
def clean_text(s):
    s = re.sub(r"[^a-z\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

df["clean"] = df["lower"].apply(clean_text)
df[["lower","clean"]].head(3)


Unnamed: 0,lower,clean
0,"""the product is great! loved it, but its a bi...",the product is great loved it but it s a bit p...
1,"""worst product ever!! wouldnt recommend to an...",worst product ever wouldn t recommend to anyone
2,"""satisfactory quality, works as expected, no m...",satisfactory quality works as expected no majo...


In [17]:
import nltk
nltk.download("punkt")
nltk.download("stopwords")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [18]:
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

stop_words = set(stopwords.words("english"))

df["tokens"] = df["clean"].apply(word_tokenize)
df["tokens_nostop"] = df["tokens"].apply(lambda toks: [w for w in toks if w not in stop_words])

df[["tokens","tokens_nostop"]].head(3)


Unnamed: 0,tokens,tokens_nostop
0,"[the, product, is, great, loved, it, but, it, ...","[product, great, loved, bit, pricey]"
1,"[worst, product, ever, wouldn, t, recommend, t...","[worst, product, ever, recommend, anyone]"
2,"[satisfactory, quality, works, as, expected, n...","[satisfactory, quality, works, expected, major..."


In [19]:
nltk.download("wordnet")
nltk.download("omw-1.4")

from nltk.stem import PorterStemmer, WordNetLemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [20]:
df["stemmed"] = df["tokens_nostop"].apply(lambda toks: [stemmer.stem(w) for w in toks])
df["lemmatized"] = df["tokens_nostop"].apply(lambda toks: [lemmatizer.lemmatize(w) for w in toks])
df


Unnamed: 0,Review_ID,Review_Text,lower,clean,tokens,tokens_nostop,stemmed,lemmatized
0,1,"""The product is GREAT! Loved it, but its a bi...","""the product is great! loved it, but its a bi...",the product is great loved it but it s a bit p...,"[the, product, is, great, loved, it, but, it, ...","[product, great, loved, bit, pricey]","[product, great, love, bit, pricey]","[product, great, loved, bit, pricey]"
1,2,"""Worst product ever!! Wouldnt recommend to an...","""worst product ever!! wouldnt recommend to an...",worst product ever wouldn t recommend to anyone,"[worst, product, ever, wouldn, t, recommend, t...","[worst, product, ever, recommend, anyone]","[worst, product, ever, recommend, anyon]","[worst, product, ever, recommend, anyone]"
2,3,"""Satisfactory quality, works as expected, no m...","""satisfactory quality, works as expected, no m...",satisfactory quality works as expected no majo...,"[satisfactory, quality, works, as, expected, n...","[satisfactory, quality, works, expected, major...","[satisfactori, qualiti, work, expect, major, i...","[satisfactory, quality, work, expected, major,..."
3,4,"""Amazing product, I would buy it again and aga...","""amazing product, i would buy it again and aga...",amazing product i would buy it again and again,"[amazing, product, i, would, buy, it, again, a...","[amazing, product, would, buy]","[amaz, product, would, buy]","[amazing, product, would, buy]"
4,5,"""The delivery was slow, but the product is good.""","""the delivery was slow, but the product is good.""",the delivery was slow but the product is good,"[the, delivery, was, slow, but, the, product, ...","[delivery, slow, product, good]","[deliveri, slow, product, good]","[delivery, slow, product, good]"
5,6,"""Horrible experience, the product broke after ...","""horrible experience, the product broke after ...",horrible experience the product broke after ju...,"[horrible, experience, the, product, broke, af...","[horrible, experience, product, broke, one, use]","[horribl, experi, product, broke, one, use]","[horrible, experience, product, broke, one, use]"
6,7,"""Great value for the price! Definitely worth b...","""great value for the price! definitely worth b...",great value for the price definitely worth buying,"[great, value, for, the, price, definitely, wo...","[great, value, price, definitely, worth, buying]","[great, valu, price, definit, worth, buy]","[great, value, price, definitely, worth, buying]"
7,8,"""The product didnt meet my expectations, retu...","""the product didnt meet my expectations, retu...",the product didn t meet my expectations return...,"[the, product, didn, t, meet, my, expectations...","[product, meet, expectations, returning]","[product, meet, expect, return]","[product, meet, expectation, returning]"
8,9,"""Im satisfied with the purchase, but there ar...","""im satisfied with the purchase, but there ar...",i m satisfied with the purchase but there are ...,"[i, m, satisfied, with, the, purchase, but, th...","[satisfied, purchase, better, options, available]","[satisfi, purchas, better, option, avail]","[satisfied, purchase, better, option, available]"
9,10,"""Superb product! Excellent build quality and g...","""superb product! excellent build quality and g...",superb product excellent build quality and gre...,"[superb, product, excellent, build, quality, a...","[superb, product, excellent, build, quality, g...","[superb, product, excel, build, qualiti, great...","[superb, product, excellent, build, quality, g..."
