In [5]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sunfa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# reading data

In [6]:
data=pd.read_csv("NLP-food-review.csv")

In [7]:
data

Unnamed: 0,review,reaction
0,Service is friendly and inviting.,1
1,Awesome service and food.,1
2,Waitress was a little slow in service.,0
3,"Come hungry, leave happy and stuffed!",1
4,Horrible - don't waste your time and money.,0
...,...,...
995,This was my first time and I can't wait until ...,1
996,Great service and food.,1
997,I paid the bill but did not tip because I felt...,0
998,The one down note is the ventilation could use...,0


In [9]:
stopwords.words("English")

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [None]:
"i am happy".split()

['i', 'am', 'happy']

In [None]:
" ".join(["i","hate","you"])

'i hate you'

In [None]:
stemmer=PorterStemmer()

In [None]:
stemmer.stem("service")

'servic'

In [None]:
def preprocessing(text):

    text=text.lower()
    text=re.sub('[^a-z:) ]' ,'',text)
    text=text.split()
    text=[stemmer.stem(word) for word in text if word not in stopwords.words("english") ]
    text=" ".join(text)
    return text

In [None]:
preprocessing(" i loved the pizza 3000")

'love pizza'

In [None]:
data["review"]=data["review"].apply(preprocessing)

In [None]:
data

Unnamed: 0,review,reaction
0,servic friendli invit,1
1,awesom servic food,1
2,waitress littl slow servic,0
3,come hungri leav happi stuf,1
4,horribl dont wast time money,0
...,...,...
995,first time cant wait next,1
996,great servic food,1
997,paid bill tip felt server terribl job,0
998,one note ventil could use upgrad,0


# train and test split

In [None]:
x_train, x_test,y_train,y_test=train_test_split(data["review"],data["reaction"],test_size=0.20,random_state=20)

# Countvectorizer

In [None]:
#train
# se1::"I love the movie"
# se2::"I hate the movie"
# se3::"vijay was very good in the movie"


#        I  love  the  movie  hate  vijay  was  very  good  in
# se1:   1.   1.   1.    1.    0.    0.     0.   0.    0.    0
# se2:   1.   0.   1.    1.    1.    0.     0    0.    0.    0
# se3:   0    0    1     1     0     1      1    1     1     1
#        0    0    0.    1.    0.    0.     0.   0     1.    0

#test

# movie is good and nice

In [None]:
c=CountVectorizer()

x_train=c.fit_transform(x_train).toarray()
x_test=c.transform(x_test).toarray()

In [None]:
x_train


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
pd.Dataframe(x_train,columns =c.get_feature_names_out() )

# model

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [None]:
models=[DecisionTreeClassifier(),LogisticRegression()]

for model in models:

    model.fit(x_train,y_train)
    pred=model.predict(x_test)
    print(str(model))
    print(classification_report(y_test,pred))
    print("="*50)

DecisionTreeClassifier()
              precision    recall  f1-score   support

           0       0.68      0.75      0.71        96
           1       0.74      0.67      0.71       104

    accuracy                           0.71       200
   macro avg       0.71      0.71      0.71       200
weighted avg       0.71      0.71      0.71       200

LogisticRegression()
              precision    recall  f1-score   support

           0       0.77      0.83      0.80        96
           1       0.83      0.77      0.80       104

    accuracy                           0.80       200
   macro avg       0.80      0.80      0.80       200
weighted avg       0.80      0.80      0.80       200



# inference

In [None]:
text="i love to eat that food"

text=preprocessing(text)
vector=c.transform({text}).toarray()

In [None]:
model.predict(vector)

array([1])