# NLP Application

### This code generates a simple sentiment analysis model that allows the classification of given comments as positive (Liked=1) or negative (Liked=0). The complexity matrix is used to evaluate the performance of the model and measures accuracy.

In [1]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix

### File path and data reading

In [3]:
try:
    Reviews = pd.read_csv(r'C:\Users\Arif Furkan\OneDrive\Belgeler\Python_kullanirken\Restaurant_Reviews.csv', on_bad_lines='skip')
    print(Reviews.head())  
except pd.errors.ParserError as e:
    print("An error occurred while reading the CSV file:", e)
    exit()

                                              Review  Liked
0                          Wow... Loved this place.       1
1                                Crust is not good.       0
2         Not tasty and the texture was just nasty.       0
3  Stopped by during the late May bank holiday of...      1
4  The selection on the menu was great and so wer...      1


### Checking and removing missing values

In [5]:
print(Reviews.isnull().sum())  
Reviews = Reviews.dropna()  
print(Reviews.isnull().sum())

Review    0
Liked     0
dtype: int64
Review    0
Liked     0
dtype: int64


### NLTK stopwords download

In [7]:
nltk.download('stopwords')
ps = PorterStemmer()

[nltk_data] Downloading package stopwords to C:\Users\Arif
[nltk_data]     Furkan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Preprocessing

In [24]:
collection = []
for i in range(len(Reviews)):
    Comment = re.sub('[^a-zA-Z]', ' ', Reviews['Review'].iloc[i])
    Comment = Comment.lower()
    Comment = Comment.split()
    Comment = [ps.stem(Word) for Word in Comment if not Word in set(stopwords.words('english'))]
    Comment = ' '.join(Comment)
    collection.append(Comment)

### Feature Extraction - Bag of Words (BOW)

In [11]:
cv = CountVectorizer(max_features=2000)
X = cv.fit_transform(collection).toarray()  # Independent variable
y = Reviews['Liked'].values  # The dependent variable

### Separating the data set into training and testing

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

### Naive Bayes model training

In [15]:
gnb = GaussianNB()
gnb.fit(X_train, y_train)

### Guess

In [18]:
y_pred = gnb.predict(X_test)

### Creating a confusion matrix

In [20]:
cm = confusion_matrix(y_test, y_pred)
print(cm)

[[48 47]
 [18 87]]


### Confusion matrix analysis

In [22]:
accuracy = np.trace(cm) / np.sum(cm)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 67.50%
