In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.preprocessing.text as tfkpt
from tensorflow.keras.preprocessing.text import Tokenizer
import json
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

Using TensorFlow backend.


In [2]:
reviews_df = pd.read_csv('ecommerce_reviews.csv')
reviews_df.head(5)

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses


In [3]:
reviews_df.describe()

Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Rating,Recommended IND,Positive Feedback Count
count,23486.0,23486.0,23486.0,23486.0,23486.0,23486.0
mean,11742.5,918.118709,43.198544,4.196032,0.822362,2.535936
std,6779.968547,203.29898,12.279544,1.110031,0.382216,5.702202
min,0.0,0.0,18.0,1.0,0.0,0.0
25%,5871.25,861.0,34.0,4.0,1.0,0.0
50%,11742.5,936.0,41.0,5.0,1.0,1.0
75%,17613.75,1078.0,52.0,5.0,1.0,3.0
max,23485.0,1205.0,99.0,5.0,1.0,122.0


In [4]:
reviews_df = reviews_df[['Review Text','Rating']].sort_values(by = 'Rating')

In [5]:
reviews_df.describe()

Unnamed: 0,Rating
count,23486.0
mean,4.196032
std,1.110031
min,1.0
25%,4.0
50%,5.0
75%,5.0
max,5.0


In [6]:
def create_sentiment(int):
    if int >=4 and int<=5:
        return 2
    if int == 3:
        return 1
    if int >=1 and int <=2:
        return 0

reviews_df['sentiment'] = reviews_df['Rating'].apply(create_sentiment)

In [7]:
def fix_floats(score):
    return int(score)

In [8]:
reviews_df.sample(5)

Unnamed: 0,Review Text,Rating,sentiment
21026,I love this tank! its something you can dress ...,5,2
7959,The dress is exactly as pictured. i bought it ...,5,2
14395,The dress is very feminine and very flirty. i...,4,2
15753,This too is great to throw on when running to ...,4,2
10027,"I typically wear a 4 in dresses, and sized up ...",4,2


In [9]:
tf.config.experimental.list_physical_devices('GPU') 
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [10]:
reviews_df.columns = reviews_df.columns.str.replace(' ', '_')

In [11]:
reviews_training_df = reviews_df[['Review_Text', 'sentiment']].dropna()
reviews_training_df.sample(5)

Unnamed: 0,Review_Text,sentiment
14460,Loved the idea of this top--but it just won't ...,1
8909,I love this blouse with a pair of jeans. the n...,2
11339,I wanted to love this dress - everything about...,2
21838,I love this dress! i will be living in it this...,2
16846,Bought it on sale and i am keeping it. it is w...,1


In [12]:
reviews_sentiment = reviews_training_df.sentiment
print(reviews_sentiment)


20289    0
14201    0
22346    0
23019    0
18740    0
        ..
10078    2
10079    2
10080    2
10082    2
23485    2
Name: sentiment, Length: 22641, dtype: int64


In [13]:
reviews_text = reviews_training_df.Review_Text
print(reviews_text)

20289    I purchased these in january and today i am go...
14201    This coat is awful, just awful. i bought it ba...
22346    I followed the other reviews and sized down. n...
23019    This skirt is definitely not what i was expect...
18740    Quality was not what you expect from retailer,...
                               ...                        
10078    This is the perfect tank. it's super soft, gre...
10079    These tanks are very soft. i have two black an...
10080    Love the cut of this t-shirt. just loose enoug...
10082    But when i saw this top in person, urged by my...
23485    This dress in a lovely platinum is feminine an...
Name: Review_Text, Length: 22641, dtype: object


In [None]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(reviews_text)
tokenizer.get_config()

In [None]:
reviews_text = tokenizer.texts_to_sequences(reviews_text)
reviews_text

In [16]:
reviews_text = tokenizer.sequences_to_matrix(reviews_text)
reviews_text

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [17]:
model = Sequential()

In [18]:

model.add(Dense(500, activation='sigmoid'))
model.add(Dropout(0.25))
model.add(Dense(350, activation='relu'))
model.add(Dropout(0.33))
model.add(Dense(200, activation='elu'))
model.add(Dropout(0.5))
model.add(Dense(50, activation='softsign'))
model.add(Dense(3, activation='softmax'))

In [19]:
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [20]:
reviews_sentiment = keras.utils.to_categorical(reviews_sentiment, 3)

In [21]:
model.fit(reviews_text, reviews_sentiment, batch_size=37, epochs=7, verbose=1, validation_split=0.2, shuffle=True)

Train on 18112 samples, validate on 4529 samples
Epoch 1/7
Epoch 2/7
Epoch 3/7
Epoch 4/7
Epoch 5/7
Epoch 6/7
Epoch 7/7


<keras.callbacks.callbacks.History at 0x145be9ed0>

In [22]:
reviews_rating_model = model.to_json()
with open('reviews_ratings_model.json', 'w') as json_file:
    json_file.write(reviews_rating_model)
model.save_weights('reviews_ratings_model.h5')