## IMPORTING LIBRARIES:

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report,precision_score
import re
import string


from sklearn.feature_extraction.text import TfidfVectorizer
from wordcloud import WordCloud


## LOADING DATA SET:

In [2]:
df = pd.read_csv(r"Women Dresses Reviews Dataset  (1).xls")
df

Unnamed: 0,s.no,age,division_name,department_name,class_name,clothing_id,title,review_text,alike_feedback_count,rating,recommend_index
0,0,40,General,Bottoms,Jeans,1028,Amazing fit and wash,Like other reviewers i was hesitant to spend t...,0,5,1
1,1,62,General Petite,Tops,Blouses,850,Lovely and unique!,As is true of a bunch of the fall clothing pho...,12,5,1
2,2,47,General Petite,Bottoms,Skirts,993,Meh,"I so wanted this skirt to work, love the desig...",3,1,0
3,3,45,General Petite,Bottoms,Pants,1068,Wow,Love love this! i was hesitant to buy this at ...,0,5,1
4,4,37,Initmates,Intimate,Swim,24,Great for bigger busts,I absolutely love the retro look of this swims...,0,5,1
...,...,...,...,...,...,...,...,...,...,...,...
23481,23481,44,General Petite,Dresses,Dresses,1081,Love it!,I oot this dress in the blue. it fits great--h...,0,5,1
23482,23482,39,General,Dresses,Dresses,1110,Great piece,I was very patient with this dress. i was wait...,1,5,1
23483,23483,29,General Petite,Tops,Knits,862,So soft and flattering,"The deep v doesn't gape, and flatters the neck...",0,5,1
23484,23484,57,General,Dresses,Dresses,1082,Another winner from isabella sinclair,"I saw this dress online this morning, went int...",10,5,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23486 entries, 0 to 23485
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   s.no                  23486 non-null  int64 
 1   age                   23486 non-null  int64 
 2   division_name         23472 non-null  object
 3   department_name       23472 non-null  object
 4   class_name            23472 non-null  object
 5   clothing_id           23486 non-null  int64 
 6   title                 19676 non-null  object
 7   review_text           22641 non-null  object
 8   alike_feedback_count  23486 non-null  int64 
 9   rating                23486 non-null  int64 
 10  recommend_index       23486 non-null  int64 
dtypes: int64(6), object(5)
memory usage: 2.0+ MB


In [4]:
df.size

258346

In [5]:
df.shape

(23486, 11)

In [6]:
df.isnull().sum()

s.no                       0
age                        0
division_name             14
department_name           14
class_name                14
clothing_id                0
title                   3810
review_text              845
alike_feedback_count       0
rating                     0
recommend_index            0
dtype: int64

In [7]:
df=df.drop(['s.no','age','division_name','department_name','class_name','clothing_id','title','alike_feedback_count','rating'],axis=1)
df

Unnamed: 0,review_text,recommend_index
0,Like other reviewers i was hesitant to spend t...,1
1,As is true of a bunch of the fall clothing pho...,1
2,"I so wanted this skirt to work, love the desig...",0
3,Love love this! i was hesitant to buy this at ...,1
4,I absolutely love the retro look of this swims...,1
...,...,...
23481,I oot this dress in the blue. it fits great--h...,1
23482,I was very patient with this dress. i was wait...,1
23483,"The deep v doesn't gape, and flatters the neck...",1
23484,"I saw this dress online this morning, went int...",1


In [8]:
df.columns=["text","recommend"]
df

Unnamed: 0,text,recommend
0,Like other reviewers i was hesitant to spend t...,1
1,As is true of a bunch of the fall clothing pho...,1
2,"I so wanted this skirt to work, love the desig...",0
3,Love love this! i was hesitant to buy this at ...,1
4,I absolutely love the retro look of this swims...,1
...,...,...
23481,I oot this dress in the blue. it fits great--h...,1
23482,I was very patient with this dress. i was wait...,1
23483,"The deep v doesn't gape, and flatters the neck...",1
23484,"I saw this dress online this morning, went int...",1


## PRE PROCESSING TEXT DATA:

In [9]:
df["text"]=df["text"].astype(str)

In [10]:
def wordopt(text): 
    #creating function to process text
    text=text.lower()
    text=re.sub('\[.*?\]','',text)
    text=re.sub("\\W"," ",text)
    text=re.sub('https?://\S+|www\.\S+','',text)
    text=re.sub('<.*?>+','',text)
    text=re.sub('[%s]'% re.escape(string.punctuation),'',text)
    text=re.sub('\n','',text)
    text=re.sub('\w*\d\w*','',text)
    return text

In [11]:
df['text']=df['text'].apply(wordopt)

In [12]:
X =df['text']
Y =df['recommend']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=18)

In [14]:
from tensorflow.keras.preprocessing.text import Tokenizer
max_vocab = 10000
tokenizer = Tokenizer(num_words=max_vocab)
tokenizer.fit_on_texts(X_train)

In [15]:
# tokenize the text into vectors 
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)

In [16]:
import tensorflow as tf

In [17]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, padding='post', maxlen=256)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, padding='post', maxlen=256)

In [18]:
max_len=256
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_vocab, output_dim=32, input_length=max_len),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])
model.build(input_shape=(None, max_len))
model.summary()



In [19]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 436ms/step - accuracy: 0.6725 - loss: 0.7169 - val_accuracy: 0.8467 - val_loss: 0.4256
Epoch 2/5
[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 428ms/step - accuracy: 0.8079 - loss: 1.2045 - val_accuracy: 0.8222 - val_loss: 0.5059
Epoch 3/5
[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m201s[0m 427ms/step - accuracy: 0.6540 - loss: 0.8017 - val_accuracy: 0.8222 - val_loss: 0.5067
Epoch 4/5
[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 748ms/step - accuracy: 0.6456 - loss: 0.6964 - val_accuracy: 0.8222 - val_loss: 0.4767
Epoch 5/5
[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 433ms/step - accuracy: 0.7834 - loss: 0.6063 - val_accuracy: 0.8520 - val_loss: 0.6333


In [21]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# Print test accuracy
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 133ms/step - accuracy: 0.8412 - loss: 0.6216
Test Accuracy: 84.12%
