In [1]:
# importing required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [19]:
# Loading dataset

data = pd.read_csv("/content/drive/MyDrive/#100DaysofMLcode/Restaurant_Reviews.tsv",sep='\t')

In [20]:
data.drop("Liked", axis='columns', inplace=True)

In [13]:
data.head(20)

Unnamed: 0,Review
0,Wow... Loved this place.
1,Crust is not good.
2,Not tasty and the texture was just nasty.
3,Stopped by during the late May bank holiday of...
4,The selection on the menu was great and so wer...
5,Now I am getting angry and I want my damn pho.
6,Honeslty it didn't taste THAT fresh.)
7,The potatoes were like rubber and you could te...
8,The fries were great too.
9,A great touch.


In [25]:
# removing special characters

import re
data["Text"] = data["Review"].map(lambda x: re.sub(r'[-()\"#/@;:<>{}`+=~|.!?,]', '', x))

In [27]:
# Converting the text to lowercase

data["Lower"] = data["Text"].str.lower()

In [29]:
# Expand contractions

import re

def decontracted(phrase):
    # specific
    phrase = re.sub(r"won\'t", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)

    # general
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase

data["Expand_contractions"] = data["Lower"].map(lambda x: decontracted(x))

In [37]:
# Removing stopwords

import re
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop = stopwords.words('english')

data['stop_words_removed'] = data['Expand_contractions'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))

In [None]:
# Tokenizing the text 

import nltk
nltk.download('punkt')

from nltk.tokenize import word_tokenize

data["Tokenized"] = data["stop_words_removed"].apply(word_tokenize)

In [46]:
# Stemming the words - Retrieving the root words 
# But Lemmatization gives meanigful root words when compared to stemming 

from nltk.stem.snowball import SnowballStemmer

# Use English stemmer.
stemmer = SnowballStemmer("english")

data['stemmed'] = data['Tokenized'].apply(lambda x: [stemmer.stem(y) for y in x]) # Stem every word.

In [55]:
data.head(20)

Unnamed: 0,Review,Text,Lower,Expand_contractions,stop_words_removed,Tokenized,stemmed
0,Wow... Loved this place.,Wow Loved this place,wow loved this place,wow loved this place,wow loved place,"[wow, loved, place]","[wow, love, place]"
1,Crust is not good.,Crust is not good,crust is not good,crust is not good,crust good,"[crust, good]","[crust, good]"
2,Not tasty and the texture was just nasty.,Not tasty and the texture was just nasty,not tasty and the texture was just nasty,not tasty and the texture was just nasty,tasty texture nasty,"[tasty, texture, nasty]","[tasti, textur, nasti]"
3,Stopped by during the late May bank holiday of...,Stopped by during the late May bank holiday of...,stopped by during the late may bank holiday of...,stopped by during the late may bank holiday of...,stopped late may bank holiday rick steve recom...,"[stopped, late, may, bank, holiday, rick, stev...","[stop, late, may, bank, holiday, rick, steve, ..."
4,The selection on the menu was great and so wer...,The selection on the menu was great and so wer...,the selection on the menu was great and so wer...,the selection on the menu was great and so wer...,selection menu great prices,"[selection, menu, great, prices]","[select, menu, great, price]"
5,Now I am getting angry and I want my damn pho.,Now I am getting angry and I want my damn pho,now i am getting angry and i want my damn pho,now i am getting angry and i want my damn pho,getting angry want damn pho,"[getting, angry, want, damn, pho]","[get, angri, want, damn, pho]"
6,Honeslty it didn't taste THAT fresh.),Honeslty it didn't taste THAT fresh,honeslty it didn't taste that fresh,honeslty it did not taste that fresh,honeslty taste fresh,"[honeslty, taste, fresh]","[honeslti, tast, fresh]"
7,The potatoes were like rubber and you could te...,The potatoes were like rubber and you could te...,the potatoes were like rubber and you could te...,the potatoes were like rubber and you could te...,potatoes like rubber could tell made ahead tim...,"[potatoes, like, rubber, could, tell, made, ah...","[potato, like, rubber, could, tell, made, ahea..."
8,The fries were great too.,The fries were great too,the fries were great too,the fries were great too,fries great,"[fries, great]","[fri, great]"
9,A great touch.,A great touch,a great touch,a great touch,great touch,"[great, touch]","[great, touch]"
