# Getting Started

This section sets up the environment for access to the Universal Sentence Encoder on TF Hub and provides examples of applying the encoder to words, sentences, and paragraphs.

In [0]:
# Install the latest Tensorflow version.
!pip3 install --quiet "tensorflow>=1.7"
# Install TF-Hub.
!pip3 install --quiet tensorflow-hub
!pip3 install seaborn



More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/).

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import keras.layers as layers
from keras.models import Model
from keras import backend as K
np.random.seed(10)

In [0]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder-large/3" #@param ["https://tfhub.dev/google/universal-sentence-encoder/2", "https://tfhub.dev/google/universal-sentence-encoder-large/3"]
embed = hub.Module(module_url)

In [0]:
embed_size = embed.get_output_info_dict()['default'].get_shape()[1].value
embed_size

512

In [0]:
import pandas as pd
df_train = pd.read_pickle("./drive/My Drive/cleansed_organic/cleansed_fine_tune_organic_full_train.pkl")
df_train.Sentiment = df_train.Sentiment.astype('category')

In [0]:
df_train.head()

Unnamed: 0,Sentence,Sentiment
6,'Organic' agriculture goes along with that the...,0
48,Organic farming is the only solution to get ri...,p
52,So organic farming is the best alternative in ...,p
54,"But the argument is, it cannot satiate needs o...",p
55,"Though the argument is valid,scientists proved...",p


In [0]:
category_counts = len(df_train.Sentiment.cat.categories)
category_counts

3

## Wrap embed module in a Lambda layer
Explicitly cast the input as a string

In [0]:
def UniversalEmbedding(x):
    return embed(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]

In [0]:
input_text = layers.Input(shape=(1,), dtype=tf.string)
embedding = layers.Lambda(UniversalEmbedding, output_shape=(embed_size,))(input_text)
dense = layers.Dense(256, activation='relu')(embedding)
pred = layers.Dense(category_counts, activation='softmax')(dense)
model = Model(inputs=[input_text], outputs=pred)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
lambda_2 (Lambda)            (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 771       
Total params: 132,099
Trainable params: 132,099
Non-trainable params: 0
_________________________________________________________________


In [0]:
train_text = df_train['Sentence'].tolist()
train_text = np.array(train_text, dtype=object)[:, np.newaxis]

train_label = np.asarray(pd.get_dummies(df_train.Sentiment), dtype = np.int8)

In [0]:
train_text.shape

(3595, 1)

In [0]:
train_label.shape

(3595, 3)

In [0]:
train_label[:3]

array([[1, 0, 0],
       [0, 0, 1],
       [0, 0, 1]], dtype=int8)

In [0]:
df_test = pd.read_pickle("./drive/My Drive/cleansed_organic/cleansed_fine_tune_organic_full_validation.pkl")
df_test.Sentiment = df_test.Sentiment.astype('category')

In [0]:
test_text = df_test['Sentence'].tolist()
test_text = np.array(test_text, dtype=object)[:, np.newaxis]
test_label = np.asarray(pd.get_dummies(df_test.Sentiment), dtype = np.int8)

In [0]:
test_text.shape

(317, 1)

In [0]:
test_label.shape

(317, 3)

## Train Keras model and save weights
This only train and save our Keras layers not the embed module' weights.

In [0]:
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history = model.fit(train_text, 
            train_label,
            validation_data=(test_text, test_label),
            epochs=10,
            batch_size=32)
  model.save_weights('./drive/My Drive/cleansed_organic/model.h5')

W0821 21:17:09.035500 139682656565120 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0821 21:17:09.086400 139682656565120 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 3595 samples, validate on 317 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history = model.fit(train_text, 
            train_label,
            validation_data=(test_text, test_label),
            epochs=20,
            batch_size=64)
  model.save_weights('./drive/My Drive/cleansed_organic/model2.h5')

Train on 3595 samples, validate on 317 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history = model.fit(train_text, 
            train_label,
            validation_data=(test_text, test_label),
            epochs=20,
            batch_size=128)
  model.save_weights('./drive/My Drive/cleansed_organic/model3.h5')

Train on 3595 samples, validate on 317 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history = model.fit(train_text, 
            train_label,
            validation_data=(test_text, test_label),
            epochs=13,
            batch_size=128)
  model.save_weights('./drive/My Drive/cleansed_organic/model4.h5')

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where








Train on 3595 samples, validate on 317 samples
Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


## Make predictions

In [0]:
new_text = []
belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_products_positive_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_products_positive']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_products_positive_prediction'] = predict_labels[index]
  

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fc4564af0b8>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')


In [0]:
new_text = []
# belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_products_negative_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_products_negative']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_products_negative_prediction'] = predict_labels[index]
  

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fc408925ba8>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')


In [0]:
new_text = []
# belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_products_neutral_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_products_neutral']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_products_neutral_prediction'] = predict_labels[index]
  

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fc40893db38>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')


In [0]:
belief_pd.to_excel('./drive/My Drive/cleansed_organic/Belief_statements_output_final_new64_20.xlsx', index = None, header=True, encoding='utf-8-sig')

In [0]:
new_text = []
belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_products_full_positive_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_full_positive']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_full_positive_prediction'] = predict_labels[index]
  
new_text = []
# belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_full_negative_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_full_negative']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_full_negative_prediction'] = predict_labels[index]
    
new_text = []
# belief_pd = pd.read_excel('./drive/My Drive/cleansed_organic/Belief_statements_output.xlsx')
belief_pd['Output_fine_tune_organic_full_neutral_prediction'] = ''
for index, row in belief_pd.iterrows():
  new_text.append(row['Output_fine_tune_organic_full_neutral']) 

new_text = np.array(new_text, dtype=object)[:, np.newaxis]
with tf.Session() as session:
  K.set_session(session)
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  model.load_weights('./drive/My Drive/cleansed_organic/model4.h5')  
  predicts = model.predict(new_text, batch_size=32)
  
categories = df_train.Sentiment.cat.categories.tolist()
predict_logits = predicts.argmax(axis=1)
predict_labels = [categories[logit] for logit in predict_logits]
for index, row in belief_pd.iterrows():
#   print(index, predict_labels[index] )
  belief_pd.at[index,'Output_fine_tune_organic_full_neutral_prediction'] = predict_labels[index]
      
belief_pd.to_excel('./drive/My Drive/cleansed_organic/Belief_statements_output_full_final_new128_13.xlsx', index = None, header=True, encoding='utf-8-sig')  

Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fc406a93518>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')
Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x7fc406a93358>>
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1473, in __del__
    self._session._session, self._handle)
tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.')
