<a href="https://colab.research.google.com/github/Anshika0309/Anshika/blob/main/PRO_C115_Product_Review_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Load the dataset from Github repository
!git clone https://github.com/procodingclass/product_dataset.git

Cloning into 'product_dataset'...
remote: Enumerating objects: 11, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (10/10), done.[K
remote: Total 11 (delta 0), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (11/11), 3.08 MiB | 13.30 MiB/s, done.


In [2]:
# Make a dataframe using the 'pandas' module
import pandas as pd
dataframe = pd.read_excel('/content/product_dataset/updated_product_dataset.xlsx')
print(dataframe.head())

    Emotion                                               Text
0  Positive  close approximation red octane mat bought one ...
1   Neutral  little lumpy mat great foam padding itâ€™s use...
2  Positive  great pad love ddr not want metal pad get work...
3  Positive  excellent pad great product highly responsive ...
4  Positive  awesome great ddr pad works perfectly pc stepm...


In [3]:
# Get unique emotions from the 'Emotion' column in the Dataset
dataframe["Emotion"].unique()

array(['Positive', 'Neutral', 'Negative'], dtype=object)

In [4]:
# Add Labels to the dataset
encode_emotions = {"Neutral": 0, "Positive": 1, "Negative": 2}

In [5]:
# replace the emotions with the Label
dataframe.replace(encode_emotions, inplace = True)
dataframe.head()

Unnamed: 0,Emotion,Text
0,1,close approximation red octane mat bought one ...
1,0,little lumpy mat great foam padding itâ€™s use...
2,1,great pad love ddr not want metal pad get work...
3,1,excellent pad great product highly responsive ...
4,1,awesome great ddr pad works perfectly pc stepm...


In [6]:
# Convert the dataframe into list for easy processing
training_sentences = []
training_labels = []

for i in range(len(dataframe)):
  sentence = dataframe.loc[i, "Text"]
  training_sentences.append(sentence)
  label = dataframe.loc[i, "Emotion"]
  training_labels.append(label)

# printing 10th element
training_sentences[10], training_labels[10]


('arrived early included blank case wont able test game get switch tried brothers device recognized card smash digital download already arrived ahead schedule well packed even blank case wasnt expectingread full review',
 1)

In [7]:
# Tokenize and pad your data
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [8]:
# Converting to numpy array
import numpy as np

training_padded = np.array(training_padded)
training_labels = np.array(training_labels)

In [16]:
# import the necessary modules from tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dropout


# Create the model
model = tf.keras.Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    Dropout(0.2),
    Conv1D(filters = 256, kernel_size=3, activation = "relu"),
    MaxPooling1D(pool_size=3),
    Conv1D(filters = 128, kernel_size=3, activation = "relu"),
    MaxPooling1D(pool_size=3),

    LSTM(128),
    Dense(128, activation = "relu"),
    Dropout(0.2),
    Dense(64, activation = "relu"),
    Dense(6, activation = "softmax")



])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [18]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, 100, 16)           160000    
                                                                 
 dropout_12 (Dropout)        (None, 100, 16)           0         
                                                                 
 conv1d_12 (Conv1D)          (None, 98, 256)           12544     
                                                                 
 max_pooling1d_12 (MaxPooli  (None, 32, 256)           0         
 ng1D)                                                           
                                                                 
 conv1d_13 (Conv1D)          (None, 30, 128)           98432     
                                                                 
 max_pooling1d_13 (MaxPooli  (None, 10, 128)           0         
 ng1D)                                                

In [17]:
number_epochs = 30
history = model.fit(training_padded,training_labels,epochs=number_epochs,verbose=2)

Epoch 1/30
671/671 - 42s - loss: 0.3086 - accuracy: 0.9227 - 42s/epoch - 62ms/step
Epoch 2/30
671/671 - 34s - loss: 0.1729 - accuracy: 0.9448 - 34s/epoch - 50ms/step
Epoch 3/30
671/671 - 32s - loss: 0.1335 - accuracy: 0.9547 - 32s/epoch - 48ms/step
Epoch 4/30
671/671 - 32s - loss: 0.1081 - accuracy: 0.9618 - 32s/epoch - 48ms/step
Epoch 5/30
671/671 - 33s - loss: 0.0925 - accuracy: 0.9670 - 33s/epoch - 49ms/step
Epoch 6/30
671/671 - 31s - loss: 0.0776 - accuracy: 0.9716 - 31s/epoch - 47ms/step
Epoch 7/30
671/671 - 31s - loss: 0.0674 - accuracy: 0.9759 - 31s/epoch - 47ms/step
Epoch 8/30
671/671 - 35s - loss: 0.0571 - accuracy: 0.9808 - 35s/epoch - 52ms/step
Epoch 9/30
671/671 - 32s - loss: 0.0511 - accuracy: 0.9827 - 32s/epoch - 47ms/step
Epoch 10/30
671/671 - 33s - loss: 0.0489 - accuracy: 0.9834 - 33s/epoch - 50ms/step
Epoch 11/30
671/671 - 31s - loss: 0.0435 - accuracy: 0.9859 - 31s/epoch - 47ms/step
Epoch 12/30
671/671 - 32s - loss: 0.0399 - accuracy: 0.9876 - 32s/epoch - 47ms/step
E

In [19]:
# Save your Model
model.save("Project115.h5")

  saving_api.save_model(


In [28]:
sentence = ["I am happy to meet my friends. We are planning to go to a party","I had a bad day at school. I got hurt while playing footlball"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences,maxlen = max_length,padding = padding_type,truncating = trunc_type)
result = model.predict(padded)
print(result)
label = np.argmax(result, axis = 1)
label=int(label[0])
print(label)
# encode_emotions = {"Neutral": 0, "Positive":1, "Negative":2}
for emotion in encode_emotions:
  if encode_emotions[emotion]==label:
    print(f"sentiment:{emotion}, label : {label}")


[[3.5886288e-07 9.9999893e-01 7.7470406e-07 0.0000000e+00 0.0000000e+00
  0.0000000e+00]
 [7.9422182e-01 1.2192096e-01 8.3857201e-02 5.0902921e-10 3.2139849e-10
  7.6022399e-10]]
1
sentiment:Positive, label : 1
