## Model 1 Demo: urgency detection using pretrained GloVe embeddings + an LSTM model

### Run this to set up the demo

In [3]:
#unzip model 
!unzip /content/glove_lstm_model.zip -d glove_lstm_model

Archive:  /content/glove_lstm_model.zip
   creating: glove_lstm_model/model/
  inflating: glove_lstm_model/__MACOSX/._model  
   creating: glove_lstm_model/model/variables/
  inflating: glove_lstm_model/__MACOSX/model/._variables  
  inflating: glove_lstm_model/model/saved_model.pb  
  inflating: glove_lstm_model/__MACOSX/model/._saved_model.pb  
   creating: glove_lstm_model/model/assets/
  inflating: glove_lstm_model/__MACOSX/model/._assets  
  inflating: glove_lstm_model/model/variables/variables.data-00000-of-00001  
  inflating: glove_lstm_model/__MACOSX/model/variables/._variables.data-00000-of-00001  
  inflating: glove_lstm_model/model/variables/variables.index  
  inflating: glove_lstm_model/__MACOSX/model/variables/._variables.index  


In [4]:
#load model
import tensorflow as tf
from tensorflow import keras

model = keras.models.load_model('glove_lstm_model/model')



In [29]:
# read the csv data in the google sheets and gather all of the sentences  
import pandas as pd

df = pd.read_csv("Urgency Sentences - Sheet1.csv", encoding="utf-8") 

#import nltk and clean the corpus
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk 

sentences = list()
lines = df['Sentence'].values.tolist()

nltk.download('punkt')
nltk.download('stopwords')

for line in lines:   
    tokens = word_tokenize(line)
    # convert to lower case
    tokens = [w.lower() for w in tokens]
    # remove punctuation from each word    
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    words = [word for word in stripped if word.isalpha()]
    # filter out stop words    
    stop_words = set(stopwords.words('english'))
    words = [w for w in words if not w in stop_words]
    sentences.append(words)

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_length = 100

# vectorize the text samples into a 2D integer tensor
tokenizer_obj = Tokenizer() 
tokenizer_obj.fit_on_texts(sentences)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Demo

In [31]:
test_phrase = "Six people are in dire need of food." #@param {type:"string"}
tokens = word_tokenize(test_phrase)
# convert to lower case
tokens = [w.lower() for w in tokens]
# remove punctuation from each word    
table = str.maketrans('', '', string.punctuation)
stripped = [w.translate(table) for w in tokens]
# remove remaining tokens that are not alphabetic
words = [word for word in stripped if word.isalpha()]
# filter out stop words    
stop_words = set(stopwords.words('english'))
words = [w for w in words if not w in stop_words]

sequences = tokenizer_obj.texts_to_sequences([words])
review_pad = pad_sequences(sequences, maxlen=max_length)

print('Sentence being analyzed by the model: ', test_phrase)
print('Model confidence that this sentence conveys urgency: {}%'.format(int(model.predict(review_pad)[0][0]*100)))

Sentence being analyzed by the model:  Six people are in dire need of food.
Model confidence that this sentence conveys urgency: 99%


## Model 2 Demo: Sentiment Analysis using trained Word2Vec embeddings and GRU

---



### Run this to set up the demo

In [17]:
# Loads both the model and the tokenizer 
import pickle
import tensorflow as tf
from tensorflow import keras

model_output = 'sentiment-model.h5'

model = keras.models.load_model(model_output)
tokenizer_output = 'tokenizer.pickle'

with open(tokenizer_output, 'rb') as handle:
    tokenizer = pickle.load(handle)

### Demo

In [19]:
# Demo of the model - user can enter in input and see the percentage value that it is a positive phrase 
import re 
import nltk
from nltk.corpus import stopwords
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

sentence = input("Enter in the phrase you would like to determine positivity/negativity for:")

# removes punctuation in the sentence
sentence = re.sub(r'[^\w\s]', '', sentence)  
# gets all of the words in the sentence 
words = word_tokenize(sentence) 
# converts all the words to lowercase
words = [w.lower() for w in words]   
# retrieves a set of all of the stopwords in English 
stop_words = set(stopwords.words('english')) 
# removes the stopwords in the sentence
words = [w for w in words if not w in stop_words]  

cleaned_sentence = [words] 

sequences = tokenizer.texts_to_sequences(cleaned_sentence)
review_pad = pad_sequences(sequences, maxlen=100)

print('Model confidence that this sentence conveys positivity: {}%'.format(int(model.predict(review_pad)[0][0]*100)))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Enter in the phrase you would like to determine positivity/negativity for:I am so sorry
Model confidence that this sentence conveys positivity: 9%


## Model 3 Demo: Urgency Detection using pretrained FastText embeddings and LSTM



### Run this to set up the demo

In [20]:
# Loads both the model and the tokenizer 
import pickle
import tensorflow as tf
from tensorflow import keras

model_output = 'urgency-model.h5'

model = keras.models.load_model(model_output)

tokenizer_output = 'urgency-tokenizer.pickle'

with open(tokenizer_output, 'rb') as handle:
    tokenizer = pickle.load(handle)

### Demo

In [24]:
# Demo of the model - user can enter in input and see the percentage value that it is a positive phrase 
import re 
import nltk
from nltk.corpus import stopwords
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("wordnet")
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.stem import WordNetLemmatizer 

sentence = input("Enter in the phrase you would like to determine urgency for:")

# removes punctuation in the sentence
sentence = re.sub(r'[^\w\s]', '', sentence)
# gets all of the words in the sentence 
words = word_tokenize(sentence)
# converts all the words to lowercase
words = [w.lower() for w in words]
# Converts each word to its lemmas form 
words = [WordNetLemmatizer().lemmatize(w) for w in words] 
# retrieves a set of all of the stopwords in English 
stop_words = set(stopwords.words('english'))
# removes the stopwords in the sentence
words = [w for w in words if not w in stop_words]

cleaned_sentence = [words]

sequences = tokenizer.texts_to_sequences(cleaned_sentence)
review_pad = pad_sequences(sequences, maxlen=100)

print('Model confidence that this sentence conveys urgency: {}%'.format(int(model.predict(review_pad)[0][0]*100)))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Enter in the phrase you would like to determine urgency for:You will miss the train
Model confidence that this sentence conveys urgency: 40%
