# IMPORT LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report,precision_score
import re
import string
import tensorflow as tf


from sklearn.feature_extraction.text import TfidfVectorizer
from wordcloud import WordCloud

# LOADING DATA SET

In [2]:
df=pd.read_csv('Women Dresses Reviews Dataset .csv')

In [3]:
df.head()

Unnamed: 0,s.no,age,division_name,department_name,class_name,clothing_id,title,review_text,alike_feedback_count,rating,recommend_index
0,0,40,General,Bottoms,Jeans,1028,Amazing fit and wash,Like other reviewers i was hesitant to spend t...,0,5,1
1,1,62,General Petite,Tops,Blouses,850,Lovely and unique!,As is true of a bunch of the fall clothing pho...,12,5,1
2,2,47,General Petite,Bottoms,Skirts,993,Meh,"I so wanted this skirt to work, love the desig...",3,1,0
3,3,45,General Petite,Bottoms,Pants,1068,Wow,Love love this! i was hesitant to buy this at ...,0,5,1
4,4,37,Initmates,Intimate,Swim,24,Great for bigger busts,I absolutely love the retro look of this swims...,0,5,1


In [4]:
df.tail()

Unnamed: 0,s.no,age,division_name,department_name,class_name,clothing_id,title,review_text,alike_feedback_count,rating,recommend_index
23481,23481,44,General Petite,Dresses,Dresses,1081,Love it!,I oot this dress in the blue. it fits great--h...,0,5,1
23482,23482,39,General,Dresses,Dresses,1110,Great piece,I was very patient with this dress. i was wait...,1,5,1
23483,23483,29,General Petite,Tops,Knits,862,So soft and flattering,"The deep v doesn't gape, and flatters the neck...",0,5,1
23484,23484,57,General,Dresses,Dresses,1082,Another winner from isabella sinclair,"I saw this dress online this morning, went int...",10,5,1
23485,23485,56,General,Jackets,Jackets,984,,Super cute jacket .perfect for fall i can't st...,0,5,1


In [5]:
df.columns

Index(['s.no', 'age', 'division_name', 'department_name', 'class_name',
       'clothing_id', 'title', 'review_text', 'alike_feedback_count', 'rating',
       'recommend_index '],
      dtype='object')

In [6]:
df.shape

(23486, 11)

In [7]:
df.isna().sum()

s.no                       0
age                        0
division_name             14
department_name           14
class_name                14
clothing_id                0
title                   3810
review_text              845
alike_feedback_count       0
rating                     0
recommend_index            0
dtype: int64

In [9]:
df = df.dropna(subset=['review_text'])

In [10]:
df.isna().sum()

s.no                       0
age                        0
division_name             13
department_name           13
class_name                13
clothing_id                0
title                   2966
review_text                0
alike_feedback_count       0
rating                     0
recommend_index            0
dtype: int64

In [12]:
df=df.drop(['s.no', 'age', 'division_name', 'department_name', 'class_name','clothing_id', 'title', 'alike_feedback_count', 'rating'],axis=1)

In [13]:
df

Unnamed: 0,review_text,recommend_index
0,Like other reviewers i was hesitant to spend t...,1
1,As is true of a bunch of the fall clothing pho...,1
2,"I so wanted this skirt to work, love the desig...",0
3,Love love this! i was hesitant to buy this at ...,1
4,I absolutely love the retro look of this swims...,1
...,...,...
23481,I oot this dress in the blue. it fits great--h...,1
23482,I was very patient with this dress. i was wait...,1
23483,"The deep v doesn't gape, and flatters the neck...",1
23484,"I saw this dress online this morning, went int...",1


In [14]:
df.sample(frac=1)

Unnamed: 0,review_text,recommend_index
13691,These wide cut stretchy demon trousers are so ...,1
2310,"I ordered this dress in two sizes, pxs and ps....",0
21051,Well fitted kinda slouchy sweater. matches pho...,1
6111,This dress is just absolutely adorable! you ca...,1
11834,I tried a size x small on in the store and i a...,0
...,...,...
16959,The cut is wonderful and the fabric is soft an...,1
20097,"I have to admit, seeing these tops online, i w...",1
17383,I've been on the hunt for the perfect dress to...,1
6034,Love this sweater! bought it in pink and grey ...,1


In [15]:
df.shape

(22641, 2)

In [16]:
df.describe()

Unnamed: 0,recommend_index
count,22641.0
mean,0.818868
std,0.385136
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


# Preprocessing Text Data

In [17]:
def wordopt(text): 
    #creating function to process text
    text=text.lower()
    text=re.sub('\[.*?\]','',text)
    text=re.sub("\\W"," ",text)
    text=re.sub('https?://\S+|www\.\S+','',text)
    text=re.sub('<.*?>+','',text)
    text=re.sub('[%s]'% re.escape(string.punctuation),'',text)
    text=re.sub('\n','',text)
    text=re.sub('\w*\d\w*','',text)
    return text

In [19]:
df['review_text']=df['review_text'].apply(wordopt)

In [25]:
features = df['review_text']
targets = df['recommend_index ']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.20, random_state=18)

In [27]:
from tensorflow.keras.preprocessing.text import Tokenizer
max_vocab = 10000
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(X_train)

In [28]:
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [29]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=256)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, padding='post', maxlen=256)

In [30]:
max_len=256
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_vocab, output_dim=32, input_length=max_len),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])


model.build(input_shape=(None, max_len))
model.summary()



In [31]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [32]:
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)


Epoch 1/5
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 168ms/step - accuracy: 0.7161 - loss: 0.7232 - val_accuracy: 0.8501 - val_loss: 0.4071
Epoch 2/5
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 171ms/step - accuracy: 0.8245 - loss: 2.4531 - val_accuracy: 0.8211 - val_loss: 2.8514
Epoch 3/5
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 171ms/step - accuracy: 0.8170 - loss: 1.5778 - val_accuracy: 0.8548 - val_loss: 0.6486
Epoch 4/5
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 189ms/step - accuracy: 0.8079 - loss: 0.5489 - val_accuracy: 0.8222 - val_loss: 0.5536
Epoch 5/5
[1m453/453[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 169ms/step - accuracy: 0.7978 - loss: 0.5864 - val_accuracy: 0.6718 - val_loss: 0.6109


In [33]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m142/142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 53ms/step - accuracy: 0.6832 - loss: 0.6095
Test Accuracy: 68.32%
