In [59]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.preprocessing.text as tfkpt
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split


In [60]:
clothing_reviews_df = pd.read_csv('womens-ecommerce-clothing-reviews/Womens Clothing E-Commerce Reviews.csv')

In [61]:
ranked_reviews = clothing_reviews_df[["Review Text", "Rating"]].sort_values(by="Rating", ascending=False)
ranked_reviews

Unnamed: 0,Review Text,Rating
11743,I absolutely love this vest. it fits extremely...,5
13393,Byron lars knows how to make you look like a w...,5
13397,,5
13398,My mom pointed out this dress in the store a w...,5
13399,Fits perfect!,5
...,...,...
19304,I have been continually disappointed in retail...,1
19288,Didn't like the dress. look pregnant in it. th...,1
19283,This top is crazy in the front and just way-of...,1
19275,"This looked like a simple, chic blouse, but th...",1


In [62]:
def create_sentiment(int):
    if int == 4 or int == 5:
        return 2
    if int == 3:
        return 1
    if int == 2 or int == 1:
        return 0

In [63]:
ranked_reviews['Sentiment'] = ranked_reviews['Rating'].apply(create_sentiment)

In [64]:
ranked_reviews.sample(40)

Unnamed: 0,Review Text,Rating,Sentiment
11519,I was so excited for this coat when i saw it i...,3,1
19041,I happened upon a pair of these shorts that so...,5,2
16185,"I love, love, love this brand. these people kn...",5,2
22056,"Very flattering and love style, print and colo...",5,2
13243,"This dress is very comfortable, lovely, and ve...",5,2
14456,"This blouse is beautiful, and very versatile, ...",5,2
6149,I felt like i was taking a risk by ordering th...,5,2
1902,"These are the most comfortable, happening pant...",5,2
5151,I'm wearing this shirt right now. i'm usually ...,5,2
21500,The fit of the dress is fabulous! it is so fla...,4,2


In [65]:
ranked_reviews.columns = ['Review Text', 'Rating', 'Sentiment']
review_training = ranked_reviews[['Review Text', 'Sentiment']].dropna()

In [66]:
review_y = review_training['Sentiment']
review_y

11743    2
13393    2
13398    2
13399    2
13405    2
        ..
19304    0
19288    0
19283    0
19275    0
10147    0
Name: Sentiment, Length: 22641, dtype: int64

In [67]:
review_x = review_training['Review Text']
review_x

11743    I absolutely love this vest. it fits extremely...
13393    Byron lars knows how to make you look like a w...
13398    My mom pointed out this dress in the store a w...
13399                                        Fits perfect!
13405    I ordered the navy/green combo in xl and it's ...
                               ...                        
19304    I have been continually disappointed in retail...
19288    Didn't like the dress. look pregnant in it. th...
19283    This top is crazy in the front and just way-of...
19275    This looked like a simple, chic blouse, but th...
10147    I've been shopping at retailer since 2007, and...
Name: Review Text, Length: 22641, dtype: object

In [68]:

X_train, X_test, y_train, y_test = train_test_split(review_x, review_y, test_size=0.33)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(15169,) (15169,)
(7472,) (7472,)


In [69]:
review_training.describe()

Unnamed: 0,Sentiment
count,22641.0
mean,1.66596
std,0.657139
min,0.0
25%,2.0
50%,2.0
75%,2.0
max,2.0


In [70]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_test)
X_train = tokenizer.texts_to_sequences(X_train)
X_train = tokenizer.sequences_to_matrix(X_train)
X_train

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.]])

In [72]:
model = Sequential()
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(75, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(25, activation='sigmoid'))
model.add(Dropout(0.3))

In [73]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [74]:
y_train = keras.utils.to_categorical(y_train, 25)

In [75]:
model.fit(X_train, y_train, epochs=7, batch_size=37, validation_split=0.2, shuffle=True)

Train on 12135 samples, validate on 3034 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.callbacks.History at 0x7f1a91d1b2b0>

In [76]:
print(model.evaluate(X_train, y_train))

[0.019616315671171492, 0.9929617643356323]


In [77]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_test)
X_test = tokenizer.texts_to_sequences(X_test)
X_test = tokenizer.sequences_to_matrix(X_test)
X_test

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 1., 1., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 1.]])

In [78]:
model = Sequential()
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(75, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(25, activation='sigmoid'))
model.add(Dropout(0.3))

In [56]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [57]:
y_test = keras.utils.to_categorical(y_test, 25)

In [58]:
model.fit(X_test, y_test, epochs=7, batch_size=37, validation_split=0.2, shuffle=True)

ValueError: Please provide as model inputs either a single array or a list of arrays. You passed: x=10500    Great quality top. i do wish it fit me...i'm r...
4214     This jacket is surprisingly soft and comfy. i'...
7616     Love this top, it looks just as pictured. the ...
2139     This top is super cute and is very flattering....
7253     I absolutely adore this cardigan. it's easy, s...
                               ...                        
20732    I bought the white in a m, one size down from ...
3173     The quality of he fabric is amazing. the thick...
22494    I love the serif jeans and these stet mid-rise...
19132    Saw this on the retailer emails as a cover and...
18962    Absolutely gorgeous top! i have purchased so m...
Name: Review Text, Length: 7472, dtype: object

In [None]:
print('gpu available', len(tf.config.experimental.list_physical_devices('GPU')))