In [1]:
# Imports Training Data
import sys
sys.path.append('../lambda/data')

from tweet_ner_data_label import train_data

In [2]:
# Imports & Dependencies
from __future__ import unicode_literals, print_function

import random
import warnings
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding


In [3]:
print(spacy.__version__)

2.3.5


In [4]:
# Update original data to include Normal NER Tags to prevent forgetting problem
nlp = spacy.load('en_core_web_md')

In [5]:
# Checks entities if multiple truck locations
def count_locations(ents_arr):
    count = 0
    
    for i in range(0,len(ents_arr)):
        cur_ent_type = ents_arr[i][2]
        if cur_ent_type == 'TRUCK_LOCATION':
            count += 1
    
    return count

#MODIFY THIS
# NO OVERLAPS WITH TRUCK DATA
    
# Adds old NER tags to train data
def append_old_ner(train_data):
    for i in range(0,len(train_data)):
        doc = nlp(train_data[i][0])
        ents_arr = train_data[i][1]['entities']
        locations_count = count_locations(ents_arr)

        for ent in doc.ents:
            start = int(ent.start_char)
            end = int(ent.end_char)

            # Cases
            # < Start---End > Bound entire word and more
            case1 = lambda truck_start, truck_end: start <= truck_start and end >= truck_end
            # < Start>---End< Bound entire left up to end
            case2 = lambda truck_start, truck_end: start <= truck_start and end <= truck_end and end >= truck_start
            # >Start---<End > Bound entire right up to start
            case3 = lambda truck_start, truck_end: end >= truck_end and start >= truck_start and start <= truck_end
            
            if locations_count == 0:
                ents_arr.append((start, end, ent.label_))
            else:
                continue_outer = False
                for j in range(0,locations_count):
                    truck_start = int(ents_arr[j][0])
                    truck_end = int(ents_arr[j][1])
                    
                    if case1(truck_start, truck_end) or case2(truck_start, truck_end) or case3(truck_start, truck_end):
                        continue_outer = True
                        break
                
                if continue_outer:
                    continue
                else:
                    ents_arr.append((start, end, ent.label_))
                    
    return train_data

train_data = append_old_ner(train_data)

In [6]:
# Validate old NER Tags
print(train_data[105])

('#tour de fat today. Come down to Yards park and try your favorite Cuban speciality paired with New Belgium beer.', {'entities': [(33, 43, 'TRUCK_LOCATION'), (0, 12, 'PERSON'), (13, 18, 'DATE'), (66, 71, 'NORP'), (95, 106, 'GPE')]})


In [9]:
# Load model if exists
def load_model(model=None):
    """Load the model, set up the pipeline and train the entity recognizer."""
    if model is not None:
        nlp = spacy.load(model)  # load existing spaCy model
        print("Loaded model '%s'" % model)
    else:
        nlp = spacy.blank("en")  # create blank Language class
        print("Created blank 'en' model")
    
    return nlp

# List of Names Entities
# To be referenced later on
move_names = []

# Train update Spacy's NER
def train_NER(train_data, iterations, model=None):
    TRAIN_DATA = train_data
    
    # Set language to only Text in tweets
    nlp = load_model(model)
    
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)
    # otherwise, get it so we can add labels
    else:
        ner = nlp.get_pipe('ner')
        
    # add new labels eg: using TRUCK_LOCATION
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            print(ent)
            ner.add_label(ent[2])
            
    # get names of other pipes to disable them during training
    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    
    # only train NER
    with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
        # show warnings for misaligned entity spans once
        warnings.filterwarnings("once", category=UserWarning, module='spacy')

        # reset and initialize the weights randomly – but only if we're
        # training a new model
        if model is None:
            nlp.begin_training()
        for itn in range(iterations):
            random.shuffle(TRAIN_DATA)
            losses = {}
            # batch up the examples using spaCy's minibatch
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(
                    texts,  # batch of texts
                    annotations,  # batch of annotations
                    drop=0.2,  # dropout - make it harder to memorise data
                    losses=losses,
                )
            print("Losses", losses)
            
    return nlp

In [10]:
# Train Data
nlp = train_NER(train_data, 5)

Created blank 'en' model
(10, 24, 'TRUCK_LOCATION')
(25, 30, 'DATE')
(39, 51, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(52, 57, 'DATE')
(11, 14, 'TRUCK_LOCATION')
(15, 20, 'DATE')
(6, 18, 'DATE')
(69, 81, 'TRUCK_LOCATION')
(0, 15, 'ORG')
(60, 65, 'DATE')
(94, 112, 'PRODUCT')
(27, 34, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(38, 47, 'DATE')
(3, 7, 'TRUCK_LOCATION')
(0, 2, 'CARDINAL')
(0, 8, 'TRUCK_LOCATION')
(21, 26, 'DATE')
(38, 47, 'TIME')
(98, 108, 'MONEY')
(6, 14, 'DATE')
(13, 29, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(42, 47, 'DATE')
(9, 17, 'TRUCK_LOCATION')
(6, 8, 'MONEY')
(10, 22, 'TRUCK_LOCATION')
(3, 17, 'TRUCK_LOCATION')
(20, 28, 'TRUCK_LOCATION')
(29, 34, 'DATE')
(17, 25, 'TRUCK_LOCATION')
(34, 46, 'TRUCK_LOCATION')
(22, 30, 'DATE')
(13, 21, 'TRUCK_LOCATION')
(23, 27, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(59, 61, 'GPE')
(9, 24, 'TRUCK_LOCATION')
(59, 66, 'DATE')
(11, 22, 'TRUCK_LOCATION')
(23, 28, 'DATE')
(13, 16, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(17, 27, 'PERSON')
(60, 62, 'CARDINAL')
(24, 38

(17, 25, 'DATE')
(35, 57, 'DATE')
(62, 65, 'CARDINAL')
(100, 114, 'PERSON')
(0, 37, 'ORG')
(22, 31, 'TRUCK_LOCATION')
(32, 35, 'ORG')
(78, 82, 'WORK_OF_ART')
(0, 9, 'TRUCK_LOCATION')
(10, 15, 'DATE')
(31, 37, 'CARDINAL')
(10, 22, 'TRUCK_LOCATION')
(23, 28, 'DATE')
(86, 95, 'DATE')
(122, 138, 'NORP')
(54, 67, 'WORK_OF_ART')
(81, 90, 'TIME')
(110, 120, 'PERSON')
(1, 9, 'TRUCK_LOCATION')
(15, 22, 'TRUCK_LOCATION')
(37, 45, 'DATE')
(50, 52, 'PERSON')
(54, 56, 'PERSON')
(58, 70, 'TRUCK_LOCATION')
(79, 102, 'TRUCK_LOCATION')
(0, 7, 'TIME')
(8, 17, 'GPE')
(20, 25, 'DATE')
(30, 45, 'TRUCK_LOCATION')
(6, 12, 'DATE')
(52, 57, 'DATE')
(16, 29, 'TRUCK_LOCATION')
(0, 5, 'DATE')
(41, 50, 'QUANTITY')
(52, 60, 'PERSON')
(66, 69, 'NORP')
(0, 3, 'CARDINAL')
(4, 38, 'PERCENT')
(56, 79, 'PERSON')
(11, 23, 'TRUCK_LOCATION')
(24, 27, 'TRUCK_LOCATION')
(0, 6, 'DATE')
(28, 37, 'PERSON')
(52, 60, 'DATE')
(0, 8, 'TRUCK_LOCATION')
(9, 14, 'DATE')
(4, 13, 'ORG')
(0, 2, 'ORG')
(49, 60, 'MONEY')
(26, 34, 'TRUCK_LOC

(111, 113, 'DATE')
(14, 25, 'TRUCK_LOCATION')
(0, 6, 'GPE')
(73, 80, 'DATE')
(10, 22, 'TRUCK_LOCATION')
(56, 64, 'PERSON')
(0, 16, 'TRUCK_LOCATION')
(80, 116, 'TRUCK_LOCATION')
(11, 25, 'TRUCK_LOCATION')
(32, 35, 'TIME')
(46, 61, 'ORG')
(68, 82, 'TRUCK_LOCATION')
(94, 100, 'TRUCK_LOCATION')
(80, 116, 'TRUCK_LOCATION')
(118, 130, 'PERSON')
(135, 140, 'ORG')
(71, 76, 'DATE')
(89, 99, 'PERSON')
(0, 12, 'TRUCK_LOCATION')
(30, 38, 'TRUCK_LOCATION')
(39, 44, 'DATE')
(54, 66, 'TIME')
(91, 99, 'DATE')
(26, 34, 'TRUCK_LOCATION')
(35, 40, 'DATE')
(1, 9, 'TRUCK_LOCATION')
(114, 117, 'TRUCK_LOCATION')
(44, 61, 'MONEY')
(63, 73, 'ORG')
(87, 88, 'MONEY')
(136, 144, 'DATE')
(43, 70, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(53, 61, 'FAC')
(61, 70, 'MONEY')
(71, 83, 'MONEY')
(15, 23, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(73, 75, 'CARDINAL')
(77, 113, 'PERSON')
(46, 65, 'TRUCK_LOCATION')
(76, 116, 'TRUCK_LOCATION')
(0, 11, 'PERSON')
(33, 38, 'DATE')
(67, 68, 'ORG')
(103, 113, 'GPE')
(31, 40, 'ORG')
(42, 59, 'ORG

(84, 110, 'ORG')
(26, 34, 'TRUCK_LOCATION')
(11, 15, 'PERSON')
(22, 27, 'DATE')
(40, 41, 'ORG')
(56, 66, 'MONEY')
(67, 78, 'MONEY')
(79, 92, 'PERSON')
(19, 23, 'TRUCK_LOCATION')
(24, 46, 'TRUCK_LOCATION')
(6, 7, 'CARDINAL')
(89, 91, 'CARDINAL')
(12, 26, 'TRUCK_LOCATION')
(35, 48, 'ORG')
(40, 59, 'TRUCK_LOCATION')
(60, 101, 'TRUCK_LOCATION')
(65, 79, 'GPE')
(81, 98, 'GPE')
(18, 36, 'TRUCK_LOCATION')
(26, 38, 'TRUCK_LOCATION')
(8, 15, 'TIME')
(39, 44, 'DATE')
(61, 66, 'CARDINAL')
(11, 24, 'TRUCK_LOCATION')
(23, 33, 'TRUCK_LOCATION')
(48, 60, 'TRUCK_LOCATION')
(0, 19, 'ORG')
(61, 66, 'DATE')
(57, 64, 'DATE')
(9, 19, 'TRUCK_LOCATION')
(56, 64, 'ORG')
(66, 69, 'ORG')
(98, 103, 'DATE')
(63, 72, 'TRUCK_LOCATION')
(11, 18, 'DATE')
(0, 6, 'GPE')
(44, 48, 'PERSON')
(49, 67, 'PERSON')
(71, 74, 'TRUCK_LOCATION')
(75, 79, 'PERSON')
(37, 52, 'TIME')
(23, 53, 'TRUCK_LOCATION')
(5, 12, 'DATE')
(72, 81, 'NORP')
(0, 14, 'ORG')
(44, 51, 'GPE')
(54, 63, 'GPE')
(65, 74, 'GPE')
(27, 30, 'TRUCK_LOCATION')
(5

(39, 63, 'FAC')
(72, 77, 'ORDINAL')
(83, 94, 'ORG')
(0, 10, 'ORG')
(75, 78, 'CARDINAL')
(79, 83, 'NORP')
(100, 101, 'CARDINAL')
(0, 5, 'ORG')
(16, 28, 'PERSON')
(0, 2, 'ORG')
(106, 129, 'ORG')
(31, 34, 'CARDINAL')
(36, 39, 'CARDINAL')
(92, 97, 'CARDINAL')
(68, 82, 'TRUCK_LOCATION')
(47, 53, 'DATE')
(53, 54, 'NORP')
(61, 63, 'CARDINAL')
(85, 102, 'ORG')
(3, 11, 'TRUCK_LOCATION')
(6, 13, 'DATE')
(24, 32, 'NORP')
(102, 107, 'ORDINAL')
(0, 8, 'ORG')
(23, 36, 'ORG')
(21, 35, 'PERSON')
(9, 17, 'TRUCK_LOCATION')
(6, 8, 'CARDINAL')
(46, 49, 'PERSON')
(58, 70, 'EVENT')
(83, 91, 'ORG')
(93, 96, 'ORG')
(0, 19, 'TRUCK_LOCATION')
(31, 36, 'TIME')
(38, 43, 'DATE')
(47, 55, 'DATE')
(64, 94, 'TRUCK_LOCATION')
(5, 12, 'TIME')
(43, 48, 'DATE')
(78, 93, 'TRUCK_LOCATION')
(5, 21, 'TIME')
(36, 41, 'DATE')
(29, 36, 'PERSON')
(50, 66, 'ORG')
(0, 2, 'ORG')
(93, 114, 'PRODUCT')
(29, 61, 'TRUCK_LOCATION')
(34, 40, 'ORG')
(41, 50, 'PRODUCT')
(62, 86, 'WORK_OF_ART')
(88, 108, 'PERSON')
(0, 9, 'TRUCK_LOCATION')
(1

(88, 104, 'ORG')
(106, 139, 'ORG')
(94, 107, 'TRUCK_LOCATION')
(81, 83, 'MONEY')
(0, 18, 'TRUCK_LOCATION')
(19, 24, 'DATE')
(14, 27, 'PERSON')
(29, 39, 'PRODUCT')
(58, 66, 'PERSON')
(29, 41, 'TRUCK_LOCATION')
(0, 5, 'DATE')
(28, 41, 'TRUCK_LOCATION')
(66, 107, 'TRUCK_LOCATION')
(35, 51, 'DATE')
(94, 110, 'PERSON')
(13, 25, 'TRUCK_LOCATION')
(32, 43, 'TRUCK_LOCATION')
(46, 51, 'TIME')
(0, 11, 'PERSON')
(13, 22, 'ORG')
(36, 41, 'DATE')
(26, 30, 'TRUCK_LOCATION')
(36, 54, 'TRUCK_LOCATION')
(8, 15, 'TIME')
(55, 60, 'DATE')
(41, 54, 'TRUCK_LOCATION')
(5, 14, 'TIME')
(31, 39, 'DATE')
(60, 74, 'TIME')
(78, 86, 'NORP')
(0, 4, 'DATE')
(84, 104, 'TRUCK_LOCATION')
(8, 14, 'DATE')
(105, 110, 'DATE')
(113, 118, 'ORG')
(5, 12, 'TIME')


  **kwargs
  **kwargs
  gold = GoldParse(doc, **gold)
  gold = GoldParse(doc, **gold)
  gold = GoldParse(doc, **gold)


ValueError: [E103] Trying to set conflicting doc.ents: '(10, 40, 'TRUCK_LOCATION')' and '(15, 22, 'LOC')'. A token can only be part of one entity, so make sure the entities you're setting don't overlap.

In [12]:
# Review NER Results
def show_ents(doc):
    doc = nlp(doc[0])
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - ' + ent.label_)
    else:
        print('No entities found')

In [13]:
# Test across subset of tweets
# for i in range(0,10):
#     show_ents(train_data[i])
#     print("")
    
print(train_data[74])
print('')
show_ents(train_data[74])

('Good morning NoMA today open Now', {'entities': [(13, 18, 'TRUCK_LOCATION')]})

morning - TIME
NoMA - TRUCK_LOCATION
today - DATE


In [14]:
# Manually test the trained model
def test_model(nlp, test_text):
    doc = nlp(test_text)
    print("Entities in '%s' \n" % test_text )
    for ent in doc.ents:
        print(ent.label_, ent.text)
        
test_model(nlp, "Today we are at 2nd Ave Parrish")

Entities in 'Today we are at 2nd Ave Parrish' 

DATE Today
TRUCK_LOCATION 2nd Ave Parrish


In [15]:
# Save model to output directory
def save_model(nlp, output_dir, name):
    if output_dir is not None:
        output_dir = Path(output_dir)
        
        if not output_dir.exists():
            output_dir.mkdir()
            
        nlp.meta[name] = name  # rename model
        nlp.to_disk(output_dir)
        print("Saved model to", output_dir)

def test_saved_model(model_dir, test_text):
    # test the saved model
    print("Loading from", model_dir)
    nlp = spacy.load(model_dir)

    # Check the classes have loaded back consistently
    # assert nlp.get_pipe("ner").move_names == move_names
    if nlp.get_pipe('ner').move_names[0] != 'B-TRUCK_LOCATION':
        print('NER Pipe doesn\'t have Truck Location')
        return
    
    test_model(nlp, test_text)

In [17]:
save_model(nlp,'../lambda/model/tweet_ner_model','ner_1')

Saved model to ../lambda/model/tweet_ner_model


In [18]:
test_saved_model('../model/tweet_ner_model', "Today we are at 2nd Ave Parrish")

Loading from ../model/tweet_ner_model


OSError: [E050] Can't find model '../model/tweet_ner_model'. It doesn't seem to be a shortcut link, a Python package or a valid path to a data directory.

In [1]:
# Imports & Dependencies
from __future__ import unicode_literals, print_function

from numpy import load

from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import class_weight

import plac
import random
import warnings
from pathlib import Path
import spacy
from spacy.util import minibatch, compounding

# Data
from tweet_data_label import train_data

# Spacy Load
nlp = spacy.load('en')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


693
Tweet 1
'This is my f**king country': Racist white woman arrested for attacking passengers on New York bus https://t.co/Rfm0jauh72
_____

Tweet 2
RT @mussie671: Freestyle #3, Fiasco type shiiii https://t.co/0GTJscWORW
_____

Tweet 3
What you think Cole? @JColeNC @JColeDirect https://t.co/oE4Ipvq6hL
_____

Tweet 4
⚡️ “Redskins dominate the Raiders on Sunday Night Football ”https://t.co/4VuHWOs1gX
DATE: Sunday Night 
_____

Tweet 5
RT @NWSSanJuan: 215PM FLASH FLOOD EMERGENCY for A Dam Failure in Isabela Municipality y Quebradillas Municipality in Puerto Rico... #prwx h…
NEIGHBORHOOD: Isabela Municipality NEIGHBORHOOD: Quebradillas Municipality 
TIME: 215PM 
_____

Tweet 6
Posted @chinatown on the corner of 7th &amp; D st. NW, PERFECT WEATHER TO SLIDE UP!!
ADDRESS: 7th &amp; D st. NW 

POSITIVE: Posted @chinatown on the corner of 
_____

Tweet 7
Good Morning DMV, we are @ChinaTown on the corner of 7th &amp; G st NW. Hand crafted burgers, endless toppings, Hand cut fries,  PULL UP !!!



Tweet 310
RT @truckeroo: Tomorrow is the last Truckeroo of the season! Come out to celebrate the start of an amazing weekend. See you there!Trucker…

NEGATIVE
_____

Tweet 311
We have missed ALL our fans in #Tysons speared the news we are there for lunch tomorrow #vegan #Egyptian #yummy… https://t.co/uviH5VhAL1

NEGATIVE
_____

Tweet 312
Good morning! Yellow Tin heads towards to US ICE today. See you all during lunch hour.

POSITIVE
_____

Tweet 313
Good morning Navy Yardian! Yellow Tin landed in front of Five Guys. See you all during lunch hour.

POSITIVE
_____

Tweet 314
Good morning! Yellow Tin heads towards to Navy Yard today. See you all during lunch hour.

POSITIVE
_____

Tweet 315
Good morning! Yellow Tin heads towards to Metro Center today. See you all during lunch hour.

POSITIVE
_____

Tweet 316
Good morning! Yellow Tin heads towards to US ICE today. See you all during lunch hour.

POSITIVE
_____

Tweet 317
Good morning! Yellow Tin landed at Farragut Square to serve lunch to

_____

Tweet 595
Monday we will be at Union station with fresh Bibim bap, Bul goki with Jap chae.  See you~

NEGATIVE
_____

Tweet 596
Tuesday we will be at Union station with fresh Korean food, Bibim bap, Bul goki with Jap chae. See you tomorrow at lunch time.

NEGATIVE
_____

Tweet 597
Thursday we will be at 500 12th St. SW in front of ICE with fresh Korean food for your lunch. Stay warm and see you tomorrow.

NEGATIVE
_____

Tweet 598
Good morning Clarendon! Unfortunately we can't make it today, parking is full.

NEGATIVE
_____

Tweet 599
Franklin square Thursday. Can't wait to serve fresh Korean food to you then. See you~

NEGATIVE
_____

Tweet 600
Union station Satuday. Can't find parking today. Open window at 11 tomorrow. See you~

NEGATIVE
_____

Tweet 601
Friday we will be at L'Enfant plaza with fresh and delicious Korean food. You need some hot and spicy food for this cold week. See you~

NEGATIVE
_____

Tweet 602
Thursday we will be at Franklin square with fresh Bibim bap, Bu

In [22]:
class DataTokenizer:
    def __init__(self, train_data):
        self.tokenizer = Tokenizer()
        self.train_data = train_data

        # Sets up tokenizer
        try:
            self.__init_tokenizer()
        except ModuleNotFoundError:
            raise

    # Clean data
    def __tweet_clean(self, text):
        lower = []
        text = text.replace('&amp;', 'and')

        for token in nlp(text):
            lower.append(token.text.lower())

        return lower
            
    # Orginize Tweet Data for tokenizer
    def __organize_tweet_data(self):
        tweet_sequences = []

        # Organize tweets & train data into arrays
        for tweet_data in self.train_data:
            tweet_sequences.append(self.__tweet_clean(tweet_data[0]))

        return tweet_sequences
    
    def __init_tokenizer(self):
        data = self.__organize_tweet_data()
        self.tokenizer.fit_on_texts(data)
        
    # Tokenization Methods
    def tokenize_tweets(self, data):
        self.tokenizer.fit_on_texts(data)

        return self.tokenizer.texts_to_sequences(data)

class NERModel:
    def __init__(self, ner_model_path):
        try:
            self.nlp_ner = spacy.load(ner_model_path)
        except ModuleNotFoundError:
            raise
            
    def predict(self, text):
        # Check the classes have loaded back consistently
        # assert nlp.get_pipe("ner").move_names == move_names
        if self.nlp_ner.get_pipe('ner').move_names[0] != 'B-TRUCK_LOCATION':
            print('NER Pipe doesn\'t have Truck Location')
            return
        
        doc = self.nlp_ner(text)
        
        if doc.ents:
            return doc.ents
        else:
            return []
    
class AffirmationModel:
    def __init__(self, affirmation_model_path, scaler_data_path, train_data, max_len):
        self.max_len = max_len # Max Tweet Length
        
        try:
            self.scaler_data = self.__load_scaler_data(scaler_data_path, 'scaler_data.npy')
        except ModuleNotFoundError:
            raise
        
        # Sets up tokenizer
        try:
            # Setup Tokenizer
            self.DataTokenizer = DataTokenizer(train_data)
            self.tokenizer = self.DataTokenizer.tokenizer
        except ModuleNotFoundError:
            raise
            
        try:
            self.nlp_affirmation = keras.models.load_model(affirmation_model_path, compile=False)
        except ModuleNotFoundError:
            raise
            
    def predict(self,tweet):
        # Predict affirmation
        tokenized_tweets = self.DataTokenizer.tokenize_tweets([self.__tweet_lower(tweet)])
        predict_tweets = [self.__pad_array(data, self.max_len) for data in tokenized_tweets]
        scaled_test_data = self.__scale_test_data(predict_tweets)
        affirmation_prediction = self.nlp_affirmation.predict_classes(scaled_test_data)
        
        if affirmation_prediction[0] == 0:
            return True
        else:
            return False
    
    # Tweet lower
    def __tweet_lower(self, text):
        text = text.replace('&amp;', 'and')

        return text.lower()
        
    def __create_zeros_array(self, length):
        zeros_arr = []

        i = 0
        while i < length:
            zeros_arr.append(0)
            i += 1

        return zeros_arr

    def __pad_array(self,data, max_len):
        zeros_len = self.max_len - len(data)
        zeros_arr = self.__create_zeros_array(zeros_len)

        return [*data,*zeros_arr]
    
    def __load_scaler_data(self, directory, file):
        try:
            path = f'{directory}/{file}'
            data = load(path)
            
            return data
        except ModuleNotFoundError:
            raise
            
    def __scale_test_data(self, data):
        scaler_object = MinMaxScaler()
        scaler_object.fit(self.scaler_data)
        scale_data = scaler_object.transform(data)
        
        return scale_data

class AssertModel:
    def __init__(self, affirmationModel, nerModel):
        self.affirmationModel = affirmationModel
        self.nerModel = nerModel

    def predict(self, tweet):
        affirmation = self.affirmationModel.predict(tweet)
        named_entities = self.nerModel.predict(tweet)
 
        location_types = {
            'TRUCK_LOCATION': [],
            'FAC': [],
            'GPE': [],
            'LOC': [],
        }
        
        for ent in named_entities:
            if ent.label_ in location_types:
                location_types[ent.label_].append(ent.text)
        
        result = {
            'tweet': tweet,
            'affirmation': affirmation,
            'locations': location_types
        }
        
        return result

In [23]:
affirmationModel = AffirmationModel('./tweet_affirmation_model/test', './tweet_affirmation_model', train_data, 35)
nerModel = NERModel('./tweet_ner_model')
assertModel = AssertModel(affirmationModel, nerModel)

NameError: name 'load' is not defined

In [60]:
print(assertModel.predict("Today we are at 5th street"))

{'tweet': 'Today we are at 5th street', 'affirmation': True, 'locations': {'TRUCK_LOCATION': ['5th street'], 'FAC': [], 'GPE': [], 'LOC': []}}


In [None]:
# Next steps, move & improve this into a properly formatted structure for AWS Lambda