# TensorFlow Lite: Sentimental Analysis using Amazon review

...

## Package import

In [1]:
# Ignore  the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
%matplotlib inline  
style.use('fivethirtyeight')
sns.set(style='whitegrid',color_codes=True)

!pip uninstall -q -y tensorflow google-colab grpcio
!pip install -q tf-nightly
#!pip install -q git+https://github.com/tensorflow/examples



In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import os
import tensorflow as tf
assert tf.__version__.startswith('2')
import sys
sys.path.append('C:/Users/Adrian/Documents/examples')

from tensorflow_examples.lite.model_customization.core.data_util.text_dataloader import TextClassifierDataLoader
from tensorflow_examples.lite.model_customization.core.model_export_format import ModelExportFormat
import tensorflow_examples.lite.model_customization.core.task.text_classifier as text_classifier

from textdata_extension import from_panda
TextClassifierDataLoader.from_panda = from_panda

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

##  Load Dataset

In [3]:
rev_frame = pd.read_csv("Reviews.csv")

In [4]:
# df is the copy we process for sentimental analysis
df=rev_frame.copy()
df.head()

Unnamed: 0,Id,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...


## Data cleaning and preprocessing

Regard only 'Text' and 'Score' for analysis and rename them to 'review' and 'rating'

In [5]:
df=df[['Text','Score']]
df['review']=df['Text']
df['rating']=df['Score']
df.drop(['Text','Score'],axis=1,inplace=True)

In [6]:
print(df.shape)
df.head()

(568454, 2)


Unnamed: 0,review,rating
0,I have bought several of the Vitality canned d...,5
1,Product arrived labeled as Jumbo Salted Peanut...,1
2,This is a confection that has been around a fe...,4
3,If you are looking for the secret ingredient i...,2
4,Great taffy at a great price. There was a wid...,5


Check for null values:

In [7]:
print(df['rating'].isnull().sum())
df['review'].isnull().sum()  # no null values.

0


0

Remove duplicates:

In [8]:
# remove duplicates/ for every duplicate we will keep only one row of that type. 
df.drop_duplicates(subset=['rating','review'],keep='first',inplace=True) 

In [9]:
# now check the shape. note that shape is reduced which shows that we did has duplicate rows.
size = df.shape
print(size)
df.head()


(393675, 2)


Unnamed: 0,review,rating
0,I have bought several of the Vitality canned d...,5
1,Product arrived labeled as Jumbo Salted Peanut...,1
2,This is a confection that has been around a fe...,4
3,If you are looking for the secret ingredient i...,2
4,Great taffy at a great price. There was a wid...,5


In [10]:
def mark_sentiment(rating):
  if(rating<=3):
    return 0
  else:
    return 1

In [11]:
df['sentiment']=df['rating'].apply(mark_sentiment)

In [12]:
df.drop(['rating'],axis=1,inplace=True)
df.head()

Unnamed: 0,review,sentiment
0,I have bought several of the Vitality canned d...,1
1,Product arrived labeled as Jumbo Salted Peanut...,0
2,This is a confection that has been around a fe...,1
3,If you are looking for the secret ingredient i...,0
4,Great taffy at a great price. There was a wid...,1


In [13]:
df.shape

(393675, 2)

## Cleaning

Has not proven to be more successful!

In [14]:
# function to clean and pre-process the text.
def clean_reviews(review):  
    
    # 1. Removing html tags
    review_text = BeautifulSoup(review,"lxml").get_text()
    
    # 2. Retaining only alphabets.
    review_text = re.sub("[^a-zA-Z]"," ",review_text)
    
    # 3. Converting to lower case and splitting
    word_tokens= review_text.lower().split()
    
    # 4. Remove stopwords
    le=WordNetLemmatizer()
    stop_words= set(stopwords.words("english"))     
    word_tokens= [le.lemmatize(w) for w in word_tokens if not w in stop_words]
    
    cleaned_review=" ".join(word_tokens)
    return cleaned_review

## Ignore!

Note that pre processing all the reviews is taking way too much time and so I will take only 100K reviews. To balance the class I have taken equal instances of each sentiment.

In [15]:
pos_df=df.loc[df.sentiment==1,:][:50000]
neg_df=df.loc[df.sentiment==0,:][:50000]

In [16]:
#combining
df=pd.concat([pos_df,neg_df],ignore_index=True)
# shuffling rows
df = df.sample(frac=1).reset_index(drop=True)
#print(df.shape)  # perfectly fine.
df.head()

Unnamed: 0,review,sentiment
0,hello found betetr tasteing popcorn with butte...,0
1,"I love the Cranberry Flax version of these, bu...",0
2,This is the best Black Olive Paste I ever had....,1
3,Firm olives but a little too salty for my tast...,0
4,This one and the Miso noodle bow are favorites...,1


## Model creation

In the following steps the Panda-object is loaded into an object that Tensorflow can process

In [21]:
train_data, test_data = TextClassifierDataLoader.from_panda(df).split(0.7) # The fraction describes the size of the training data

Let's have a look at the data with it's classification and it's review:

In [22]:
for text, label in train_data.dataset.take(10):
  print ("%s: %s"%(train_data.index_to_label[label.numpy()], text.numpy()))

0: b"I was extremely disappionted when I opened my shipment of Enjoy Life On The Go Bars (Very Berry, Carmel Apple, and Cocoa Loco). The boxes are now a plain, generic design..you know, the kind all brands change to when they want to make their products look cheaper. I then compared the ingredients of the new shipment with the older boxes I still had at home. Guess what?! Cheaper ingredients! Next, I taste-tested, and let my 3 year old help. He wouldn't eat the new ones...and I can't blame him. They are oily and bland. I thought Enjoy Life was a brand that actually cared about using quality ingredients to make a good-tasting product. I was wrong. In summary, these are way too expensive to be made with low-quality, unpalatable ingredients."
1: b'My dog would do anything for his liver treats!  ...and whenever any of his playmates are out for a walk they drag their owners to our house so they can have a liver treat as well (in fact most of their owners now buy it for them).  It is healthy

In [23]:
model = text_classifier.create(train_data, epochs=3,test_ratio=0)

INFO:tensorflow:Retraining the models...
Train for 2531 steps, validate for 281 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


We can have a look at the specifics of the model:

In [None]:
model.summary()

After training we should test our model on some unseen test data to evaluate our model:

In [24]:
loss, acc = model.evaluate(test_data)



Finally we can export the model to use it in the App using TensorFlow Lite :)

In [25]:
model.export('amazon_review_classifier_filter.tflite', 'text_label_n2.txt', 'vocab_n2.txt')

INFO:tensorflow:Export to tflite model amazon_review_classifier_filter.tflite, saved labels in text_label_n2.txt.
INFO:tensorflow:  Saved vocabulary in vocab_n2.txt.
