# Simple supervised NLP

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the Dataset

In [2]:
dataset = pd.read_csv("reviews_mixed.csv")

## Cleaning the text

In [3]:
import re
import nltk 
from nltk.corpus import stopwords 
from nltk.stem.porter import PorterStemmer 
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\razva\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\razva\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
print(len(dataset))

207


In [5]:
corpus = []

for i in range(207):
  review = re.sub('[^a-zA-Z]', ' ', dataset['Text'][i])  
  review = review.lower()
  review = review.split()

  ps = PorterStemmer()
  lemmatizer = WordNetLemmatizer()
  my_stopwords = stopwords.words('english')
  my_stopwords.remove('not')
#   review = [ps.stem(word) for word in review if not word in set(my_stopwords)]
  # review = [lemmatizer.lemmatize(word) for word in review if not word in set(my_stopwords)]
  review = [word for word in review if not word in set(my_stopwords)]
  review = ' '.join(review)
  corpus.append(review)

In [19]:
print(corpus)

['room extremely small practically bed', 'room safe not work', 'mattress comfortable', 'uncomfortable thin mattress plastic cover rustle every time move', 'bathroom room', 'bed soooo comfy', 'someone must smoking room next door', 'bed comfortable', 'spacious room quiet comfortable', 'people bedroom sofa bed bit unconfortable', 'light common room dim', 'air conditioning working fine', 'type like let water run bit getting wet take minute figure make hot gonna get wet', 'window single glazed heat could escape although fair outside', 'terrible small cubbyhole marketed room', 'corridor filthy room filthy electrical cable room not safe whole building smelly shower repulsive', 'wall seem sound insulation', 'gym small basic', 'mattress springy uncomfortable', 'light comfy bed unbeatable', 'show wear tear', 'think didnt work well air tv open window', 'microwave needed', 'room cleaned bed made', 'room cable tv safe iron hairdryer free coffee tea downstairs area', 'heat room fluctuated time felt 

## Data preprocessing

In [6]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, -1].values

In [7]:
print(len(X[0]))

527


In [8]:
from sklearn.preprocessing import LabelBinarizer 

new_y=[] 
for el in y: 
    if el=='negative': 
        new_y.append(0)
    else: 
        new_y.append(1)
y=np.array(new_y)
print(y)

[0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 0
 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1
 0 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 0 0 0
 0 1 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1 0 1 0 0
 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 0 0 1 1 0 0 0 0
 1 1 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0]


## Splitting into Training and Test set

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Building an ANN

In [10]:
import tensorflow as tf

### Initializing

In [11]:
ann = tf.keras.models.Sequential()

### Adding the first hidden layer

In [12]:
ann.add(tf.keras.layers.Dense(units=60, activation='relu'))

### Adding the second hidden layer

In [13]:
ann.add(tf.keras.layers.Dense(units=60, activation='relu'))

### The output layer

In [14]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Training the ANN

### Compiling

In [15]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [16]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x1ea94925400>

In [17]:
y_pred = ann.predict(X_test)
y_pred_labels = []

for el in y_pred: 
    if el >0.5: 
        y_pred_labels.append(1)
    else:
        y_pred_labels.append(0)


## Confusion Matrix

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_pred_labels, y_test)
print(cm)
accuracy_score(y_pred_labels, y_test)

[[29  9]
 [ 1  3]]


0.7619047619047619