In [1]:
import pandas as pd



In [2]:
df = pd.read_csv('data.csv')

In [3]:
df = df[['reviews.text', 'reviews.rating']]

In [4]:
df['reviews.rating'].replace({1:0,2:0,3:0,4:1,5:1}, inplace=True)

In [5]:
#Removing stopwords

In [6]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [7]:
stop_words = set(stopwords.words('english'))

In [8]:
def preprocess(text):
    word_tokens = word_tokenize(text)
    processed = [w for w in word_tokens if not w.lower() in stop_words]
    return ' '.join(processed)

In [9]:
df['filtered'] = df['reviews.text'].map(preprocess)

In [10]:
df['filtered']

0       thought would big small paper turn like palm ....
1            kindle light easy use especially beach ! ! !
2       Didnt know much 'd use kindle went lower end ....
3       100 happy purchase . caught sale really good p...
4       Solid entry level Kindle . Great kids . Gifted...
                              ...                        
4995                 great tablet price . Amazon good job
4996    tablet perfect size easy use . Read , play gam...
4997    Purchased son . room upgrade memory allow book...
4998    thoughts getting 5 year old , get screen prote...
4999                 steal , 8 gb model well.This punch..
Name: filtered, Length: 5000, dtype: object

In [11]:
#Using simple feed forward neural network

In [12]:
from sklearn.feature_extraction.text import CountVectorizer

In [13]:
cv = CountVectorizer()

In [14]:
text = cv.fit_transform(df['filtered']).toarray()

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, Y_train, Y_test = train_test_split(text,df['reviews.rating'], shuffle=True, test_size=0.2, random_state=8)

In [17]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

In [18]:
model = Sequential(
[
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
]
    
)

In [19]:
model.compile(optimizer='adam', metrics='accuracy', loss='binary_crossentropy')

In [20]:
model.fit(X_train, Y_train, epochs=10, validation_data=(X_test,Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1eb135b78d0>

In [21]:
#Using LSTM

In [22]:
from tensorflow.keras.preprocessing.text import Tokenizer

In [23]:
tokenizer=Tokenizer(num_words=500)

In [24]:
tokenizer.fit_on_texts(df['filtered'])

In [25]:
text = tokenizer.texts_to_sequences(df['filtered'])

In [26]:
vocab_size = len(tokenizer.word_index) + 1

In [27]:
from keras.preprocessing.sequence import pad_sequences
maxlen=100
text = pad_sequences(text, padding='post', maxlen=maxlen)

In [28]:
X_train, X_test, Y_train, Y_test = train_test_split(text,df['reviews.rating'], shuffle=True, test_size=0.2, random_state=8)

In [29]:
from tensorflow.keras.layers import LSTM,Embedding, Flatten
embedding_dim = 50
model = Sequential(
[
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen),
    LSTM(8, activation='relu'),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
]
    
)
model.compile(optimizer='adam', metrics='accuracy', loss='binary_crossentropy')

In [30]:
model.fit(X_train, Y_train, epochs=10, validation_data=(X_test,Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1eb148a9fd0>

In [31]:
#Using CNN

In [32]:
from tensorflow.keras.layers import Conv1D
embedding_dim = 50
model = Sequential(
[
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen),
    Conv1D(8, 5, activation='relu'),
    Flatten(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
]
    
)
model.compile(optimizer='adam', metrics='accuracy', loss='binary_crossentropy')

In [33]:
model.fit(X_train, Y_train, epochs=10, validation_data=(X_test,Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1eb0155ddd8>