In [68]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding,Concatenate,Input,Bidirectional,LSTM,Dense,Dropout,GRU
from sklearn.model_selection import train_test_split

In [41]:
data = pd.read_csv('DATASET.csv')
print(data.head())
print(data.shape)

                                              Review     label
0  Great music service, the audio is high quality...  POSITIVE
1  Please ignore previous negative rating. This a...  POSITIVE
2  This pop-up "Get the best Spotify experience o...  NEGATIVE
3    Really buggy and terrible to use as of recently  NEGATIVE
4  Dear Spotify why do I get songs that I didn't ...  NEGATIVE
(52702, 2)


In [42]:
data.isna().sum()

Review    16
label      0
dtype: int64

In [43]:
data = data.dropna()
data.shape

(52686, 2)

In [53]:
data['Review'] = data['Review'].astype(str).str.replace(r'Ã[\x80-\xBF]+|[^a-zA-Z\s]|\s+', ' ', regex=True).str.strip().str.lower()

In [54]:
tokenized_reviews = []
for review in data['Review']:
    tokens = word_tokenize(review)
    tokenized_reviews.append(tokens)
data['Review Tokenized'] = tokenized_reviews
data.head()

Unnamed: 0,Review,label,Tokenized Text,Text Without Stopwords,Text Lemmatized,Review Tokenized
0,great music service the audio is high quality ...,POSITIVE,"[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ...","[great, music, service, the, audio, is, high, ..."
1,please ignore previous negative rating this ap...,POSITIVE,"[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,...","[please, ignore, previous, negative, rating, t..."
2,this pop up get the best spotify experience on...,NEGATIVE,"[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,...","[this, pop, up, get, the, best, spotify, exper..."
3,really buggy and terrible to use as of recently,NEGATIVE,"[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]","[really, buggy, and, terrible, to, use, as, of..."
4,dear spotify why do i get songs that i didn t ...,NEGATIVE,"[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, why, do, i, get, songs, that, ..."


In [55]:
stop_words = stopwords.words('english')
reviews_no_stopwords = []
for review in data['Review Tokenized']:
    review_no_stopwords = [word for word in review if word not in stop_words]
    reviews_no_stopwords.append(review_no_stopwords)
data['Review Without Stopwords'] = reviews_no_stopwords
data.head()

Unnamed: 0,Review,label,Tokenized Text,Text Without Stopwords,Text Lemmatized,Review Tokenized,Review Without Stopwords
0,great music service the audio is high quality ...,POSITIVE,"[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ...","[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ..."
1,please ignore previous negative rating this ap...,POSITIVE,"[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,...","[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a..."
2,this pop up get the best spotify experience on...,NEGATIVE,"[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,...","[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,..."
3,really buggy and terrible to use as of recently,NEGATIVE,"[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]","[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]"
4,dear spotify why do i get songs that i didn t ...,NEGATIVE,"[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu..."


In [56]:
lemmatizer = WordNetLemmatizer()
lemmatized_reviews = []
for review in data['Review Without Stopwords']:
    lemmatized_review = [lemmatizer.lemmatize(word,pos='v') for word in review]
    lemmatized_reviews.append(lemmatized_review)
data['Review Lemmatized'] = lemmatized_reviews
data.head()

Unnamed: 0,Review,label,Tokenized Text,Text Without Stopwords,Text Lemmatized,Review Tokenized,Review Without Stopwords,Review Lemmatized
0,great music service the audio is high quality ...,POSITIVE,"[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ...","[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ..."
1,please ignore previous negative rating this ap...,POSITIVE,"[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,...","[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,..."
2,this pop up get the best spotify experience on...,NEGATIVE,"[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,...","[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,..."
3,really buggy and terrible to use as of recently,NEGATIVE,"[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]","[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]"
4,dear spotify why do i get songs that i didn t ...,NEGATIVE,"[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu..."


In [61]:
processed_reviews = []
for review in data['Review Lemmatized']:
    review = ' '.join(review)
    processed_reviews.append(review)
data['Processed Reviews'] = processed_reviews
data.head()

Unnamed: 0,Review,label,Tokenized Text,Text Without Stopwords,Text Lemmatized,Review Tokenized,Review Without Stopwords,Review Lemmatized,Processed Review,Processed Reviews
0,great music service the audio is high quality ...,POSITIVE,"[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ...","[great, music, service, the, audio, is, high, ...","[great, music, service, audio, high, quality, ...","[great, music, service, audio, high, quality, ...",great music service audio high quality app eas...,great music service audio high quality app eas...
1,please ignore previous negative rating this ap...,POSITIVE,"[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,...","[please, ignore, previous, negative, rating, t...","[please, ignore, previous, negative, rating, a...","[please, ignore, previous, negative, rat, app,...",please ignore previous negative rat app super ...,please ignore previous negative rat app super ...
2,this pop up get the best spotify experience on...,NEGATIVE,"[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,...","[this, pop, up, get, the, best, spotify, exper...","[pop, get, best, spotify, experience, android,...","[pop, get, best, spotify, experience, android,...",pop get best spotify experience android annoy ...,pop get best spotify experience android annoy ...
3,really buggy and terrible to use as of recently,NEGATIVE,"[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]","[really, buggy, and, terrible, to, use, as, of...","[really, buggy, terrible, use, recently]","[really, buggy, terrible, use, recently]",really buggy terrible use recently,really buggy terrible use recently
4,dear spotify why do i get songs that i didn t ...,NEGATIVE,"[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, why, do, i, get, songs, that, ...","[dear, spotify, get, songs, put, playlist, shu...","[dear, spotify, get, songs, put, playlist, shu...",dear spotify get songs put playlist shuffle play,dear spotify get songs put playlist shuffle play


In [62]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['Processed Reviews'])
vocab_size = len(tokenizer.word_index) + 1
vocab_size

17454

In [63]:
input_sequences = tokenizer.texts_to_sequences(data['Review'])
input_sequences = np.array(pad_sequences(input_sequences, padding='pre'))

In [64]:
y = data['label'].map({'POSITIVE':1, 'NEGATIVE':0})
y

0        1
1        1
2        0
3        0
4        0
        ..
52697    1
52698    1
52699    1
52700    1
52701    1
Name: label, Length: 52686, dtype: int64

In [65]:
x_train, x_test, y_train, y_test = train_test_split(input_sequences, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25)

In [66]:
model = Sequential([
    Embedding(vocab_size,100),
    Bidirectional(LSTM(100, return_sequences=True)),
    Bidirectional(LSTM(250)),
    Dropout(0.2),
    Bidirectional(LSTM(250)),
    Bidirectional(LSTM(100)),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [67]:
early_stopping = EarlyStopping(monitor='val_loss',mode='min',patience=3,verbose=1, restore_best_weights=True)
history = model.fit(x_train, y_train, epochs=50, validation_data=(x_val,y_val), verbose=1, callbacks=[early_stopping])

Epoch 1/50


ValueError: Input 0 of layer "bidirectional_14" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 500)

In [29]:
pred = model.predict(x_test)
accuracy = (pred==y_test) / len(y_test)

ValueError: Input 0 of layer "bidirectional_2" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (32, 500)