## Evaluation Notebook

In [2]:
!python -m spacy download en_core_web_sm



Collecting en-core-web-sm==3.1.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.1.0/en_core_web_sm-3.1.0-py3-none-any.whl (13.6 MB)
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.1.0
[+] Download and installation successful
You can now load the package via spacy.load('en_core_web_sm')


In [None]:
pip install spacy

In [1]:
import pandas as pd
from tqdm import tqdm
import spacy
from spacy.tokens import DocBin
import re
import os
from spacy import displacy
from sklearn import metrics
from sklearn.metrics import plot_confusion_matrix

from spacy.util import filter_spans
 



FOODKEEPER_PATH = "datasets/FoodKeeper-Data.xls"
TRAINING_DATA_PATH = "datasets/data.csv"
MODEL_PATH = "output/model-last"
TEST_DATA_PATH = "datasets/test_data.csv"
REAL_TWITTER_DATA_PATH = "datasets/training1.6.csv"
#STARTING_KEYWORD_COUNT = 10
#TRAINING_LOOP_ITERATIONS = 3
#REQUIRED_KEYWORDS = 3

pd.options.mode.chained_assignment = None

#nlp = spacy.load(MODEL_PATH)
nlp = spacy.blank("en")
food_data = pd.read_excel(FOODKEEPER_PATH, sheet_name = "Product")
training_data = pd.read_csv(TRAINING_DATA_PATH,index_col = False, header = None)
test_data = pd.read_csv(TEST_DATA_PATH)
live_tweets = pd.read_csv(REAL_TWITTER_DATA_PATH, header = None)


#loop through and count the specific entities
keywords = [] #'chicken', 'milk', 'butter', 'cheese'
sampleData = []


    
#update rank tweet to take the counter as a parameter and condense both rankings
def rankTweet(tweet, model):
#     model = spacy.load(MODEL_PATH)
#     tweetKeywords = []
    doc = model(tweet)
    return len(doc.ents)
       
    
def findNewKeywords(tweet, keywords):
    foodkeeperKeys = foodKeeperInfo()
    x = tweet.split()
    word = ""
    i = 0
    while i < len(x):
    #for i in range(len(x)):
        z = 1
        if x[i] in foodkeeperKeys:
            word = x[i]
        try:
            foundBiWord = x[i] + " " + x[i+1]
            if foundBiWord in foodkeeperKeys: #keywords
                word = foundBiWord
                z = 2
        except:
            pass
        
        try:
            foundTriWord = x[i] + " " + x[i+1] + " " + x[i+2]
            if foundTriWord in foodkeeperKeys: #keywords:
                word = foundTriWord
                z = 3
        except:
            pass
        i += z
        
        if word not in keywords and word != "":
            keywords.append(word)
    return keywords
    

#Function to find the most common verbs in the tweets
def getCommonVerbs(data):
    import en_core_web_sm
    nlp2 = spacy.load("en_core_web_sm")
    count = 0
    myVerbs = {}
    for i in range(len(data[0])):
        doc = nlp2(data[0][i])
        for token in doc:
            if token.pos_ == "VERB":
                if token.text in myVerbs:
                    myVerbs[token.text] = myVerbs[token.text] + 1
                else:
                    if token.text not in nlp2.Defaults.stop_words:
                        myVerbs[token.text] = 1
        

    topVerbs = dict(sorted(myVerbs.items(), key = lambda item: item[1], reverse=True)[:10])
    return [key for key in topVerbs]
    

    
def convertToTrainingFormat(tweet, keywords):
    foodKeeperKeywordsTest = foodKeeperInfo()
    x = tweet.split()
    myEnts = {'entities':[]}
    found = False
    i = 0
    foundWords = []
    while i < len(x):
        z = 1
        newWord = ""
        if x[i] in keywords:
            pos = tweet.find(x[i])
            y = (pos, pos + len(x[i]), 'FOOD')
            found = True
        if x[i] in foodKeeperKeywordsTest:
            newWord = x[i]
            
        try:
            foundBiWord = x[i] + " " + x[i+1]
            if foundBiWord in keywords:
                pos = tweet.find(x[i])
                y = (pos, pos + len(x[i])+len(x[i+1]) + 1, 'FOOD')
                found = True
                z = 2
            if foundBiWord in foodKeeperKeywordsTest:
                newWord = foundBiWord
        except:
            pass
        
        try:
            foundTriWord = x[i] + " " + x[i+1] + " " + x[i+2]
            if foundTriWord in keywords:
                pos = tweet.find(x[i])
                y = (pos, pos + len(x[i])+len(x[i+1])+len(x[i+2]) + 2, 'FOOD')
                found = True
                z = 3
            if foundTriWord in foodKeeperKeywordsTest:
                newWord = foundTriWord
        except:
            pass
        
        try:
            if y not in myEnts['entities']:
                myEnts['entities'].append(y)    
        except:
            pass

        if newWord != "" and newWord not in keywordRanker:
            keywordRanker[newWord] = 1
        elif newWord != "" and newWord in keywordRanker:
            keywordRanker[newWord] += 1
        #print(z)
        i += z
        #print(i)
    formatted = (tweet, myEnts)
    #print(formatted)
    if found:
        return formatted
    else: 
        return ()
     
#Gathers all the keywords from the FoodKeeper database
def foodKeeperInfo():              
    keywords = []
    for word in food_data['Name']:
        word = word.replace(" or ", " ")
        word = re.sub('[/,]', ' ', word)
        word = word.lstrip()
        word = word.rstrip()

        if word.lower() not in keywords: 
            keywords.append(word.lower())

    #print("Total foodkeeper food names: " + str(len(keywords)))        
    #for element in sorted(keywords):
        #print(element)
        
    return keywords

foodKeeperKeywordsTest = foodKeeperInfo()


def preProcess(tweet):
    #Converts a tweet to lowercase, replaces anyusername w/ <USERNAME> and URLS with <URL>
    tweet = tweet.lower()
    tweet = re.sub('@[a-zA-z0-9]*', '', tweet)              # <USERNAME>
    tweet = re.sub('http[a-zA-z0-9./:]*', '', tweet)       # <URL>
    tweet = re.sub('[.,-]*', '', tweet)
    tweet = re.sub('&amp;', 'and', tweet)
    
    return tweet


noEntity= []

keywordRanker = {}

def trainModel(data):
    #Initialize all the variables
    keywords = [] #foodKeeperInfo()
    oldKeywords = []
    newKeywords = []
    
      
    
    #commonVerbs = getCommonVerbs(data) 
    print(commonVerbs)
    print("Common Verbs gathered...", '\n')
    
    #entityCheckCount controls how many entities are required to 
    #add a Tweet to be trained
    
    entityCheckCount = 3
    
    counter = 0
    trainingLoop = True
    
    while trainingLoop:
        counterText = "~~~~~~~~~~~~~~~~~"+str(counter)+"~~~~~~~~~~~~~~~~~"
        print(counterText)
        
        nlp = spacy.blank("en") # load a new spacy model
        try:
            model = spacy.load(MODEL_PATH)
            print('Model loaded...')
        except:
            print('No model...')
        
        db = DocBin() # create a DocBin object
        
        myTweets = []
        
        #Loop through all the tweets
        #This loop is necessary to get the most common keywords 
        #in the convertToTrainingFormat function
        
        for i in range(len(data[0])): #len(data[0])
            #useless if?
#             if i % 500 == 0:
#                 print(i)

            if counter == 0:
                x = convertToTrainingFormat(preProcess(data[0][i]), keywords)
                
            #If counter is 1 then there is no model to check so 
            #a word count is performed
            elif counter == 1:
                x = convertToTrainingFormat(preProcess(data[0][i]), keywords)
                if x!= ():
                    if len(x[1]['entities']) > entityCheckCount:
                        #print("Found tweet", x[0])
                        myTweets.append(x)  
            else:
                #Convert each tweet into spacy training format
                x = convertToTrainingFormat(preProcess(data[0][i]), keywords)
                checkPassed = False
                if x != ():
                    #Check the ranking of the tweet
                    if rankTweet(x[0], model) > entityCheckCount:
                        checkTweet = x[0].split()
                        
                        #Check to see if tweet has one of the common verbs
                        for word in checkTweet:
                            if word in commonVerbs:
                                checkPassed = True
                                
                        if True: #checkPassed:
                            #print("Checking rank...")
                            myTweets.append(x)
        
                        
#         Initialize the keywords
       
        
        if counter == 0:  
            # Set keywords to be all keywords found in foodkeeper
#            keywords = foodKeeperInfo()
            sortedKeywords =  sorted(keywordRanker, key=keywordRanker.get, reverse=True)

            for i in range(15): #sortedKeywords
                keywords.append(sortedKeywords[i])
            #print(sortedKeywords[i], keywordRanker[sortedKeywords[i]])
            

        elif counter > 0:
            for text, annot in tqdm(myTweets): # data in previous format
                doc = nlp.make_doc(text) # create doc object from text
                ents = []
                for start, end, label in annot["entities"]: # add character indexes
                    span = doc.char_span(start, end, label=label, alignment_mode="contract")
                    
                    if span is None:
                        print("Skipping entity")
                    elif ents == []: ents.append(span) 
                    else:
                        #Check to see if any entities are overlapping i.e rice and rice cakes
                        ents.append(span)
                        
                        
#                         for ent in ents:
#                             if ent is not None:
#                                 entLength = ent.end - ent.start
#                                 if span.start == ent.start or span.end == ent.end:
#                                     if entLength > (span.end - span.start):
#                                         continue
#                                     else:
#                                         ents.remove(ent)
#                                         ents.append(span)
#                                 else:
#                                     ents.append(span)
                    #if span not in ents: ents.append(span)
                            
                                #print(span.start, span.end, ents)
                    #put into for loop
#                 foundEnts = []
#                 newEnts = []
#                 for ent in ents:
#                     if ent.text not in foundEnts:
#                         foundEnts.append(ent.text)
#                         newEnts.append(ent)
                        
                newEnts = filter_spans(ents)              
                #try:
                doc.ents = newEnts # label the text with the ents
                    #print(doc)
                db.add(doc)
                #except:
                    #print("Error 10: ", doc)

            db.to_disk("./train.spacy") # save the docbin object
                
            #If problems are occuring with the models not appearing
            #ensure that the command is valid, specifically python is the correct
            #PATH variable name on your machine
            #--paths.train should be where the docbin object is saved
            stream = os.popen('python -m spacy train config.cfg --output ./output --paths.train ./train.spacy --paths.dev ./train.spacy')
            print(stream.read())
            print("Total keywords: ", str(len(keywords)))
            print("List of Keywords:\n\n",keywords,"\n\n")
            oldKeywords = len(keywords)
            
            
            #Look for new keywords
            for element in myTweets:
                keywords = findNewKeywords(element[0], keywords)
                
            
            #No new keywords are found
            if (oldKeywords == len(keywords)) and counter > 1 and entityCheckCount != 1:
                entityCheckCount -= 1
                print("Decreasing entityCheckCount variable by 1")
                print("entityCheckCount = ", entityCheckCount)
                

            #New keywords are found and entity rank check == 1
            elif (oldKeywords == len(keywords)) and counter > 1 and entityCheckCount == 1:
                trainingLoop = False
            
            eval_model()
            
        

        #for element in myTweets:
            #findNewKeywords(element[0], keywords)

        print("Total keywords: ", str(len(keywords)))
        print("Total Tweets: ", str(len(myTweets)))
        print("List of Keywords:\n\n",keywords,"\n\n")
        counter += 1
        
    print('Training Done...')
        
        
        

def information(data):
    myData = {}
    totalEnt=0
    
    for i in range(len(data[0])):
        doc = nlp(preProcess(data[0][i]))
        if len(doc.ents) == 0:
            noEntity.append(preProcess(data[0][i]))
            
        #print(len(doc.ents))
        if(len(doc.ents) == 4):
            print(doc)
            
        for entity in doc.ents: 
        #print(entity.label_)
            totalEnt+=1
            if(entity.label_ == 'FOOD'):
                if entity.text in myData:
                    myData[entity.text] += 1
                else:
                    myData[entity.text] = 1
                    
    print("Number of entities found: " + str(len(myData)))
    print(totalEnt)
    for i in sorted(myData, key = myData.get):
        print("Entity: " + i, "Count: " + str(myData[i]), "Density: " + str(format(myData[i]/totalEnt, '.2f')), end = "\n")
    
    
    return myData




for i in range(len(test_data['tweet'])):
    test_data['tweet'][i] = preProcess(test_data['tweet'][i])



y = test_data['food'].tolist()
print(nlp.pipe_names)
    
def ent_recognize(text):
    doc = nlp(text)
    displacy.render(doc,style = "ent")
    
def predict(tweet):
    doc = nlp(str(tweet))
    if doc.ents:
        displacy.render(doc,style = "ent")

def returnPrediction(tweet):
    nlp = spacy.load(MODEL_PATH)
    doc = nlp(str(tweet))
    if doc.ents:
        return 1
    else:
        return 0
    
def get_predictions():
    predictions = []
    for tweet in test_data['tweet'].tolist():
        predictions.append(returnPrediction(tweet))
    return predictions
    
def eval_model():
    nlp = spacy.load(MODEL_PATH)
    predictions = get_predictions()
    print(metrics.confusion_matrix(y,predictions, labels = [1,0]))
    print(metrics.classification_report(y,predictions, labels = [1,0]))
    
def show_tp():
    counter = 0
    tweets = test_data['tweet'].tolist()
    predictions = get_predictions()
    for i in range(len(y)):
        if predictions[i] == 1 and y[i] == 1:
            print("True positives:", tweets[i], "\n")
            counter += 1
    print(counter)
    
def show_tn():
    counter = 0
    predictions = get_predictions()
    tweets = test_data['tweet'].tolist()
    for i in range(len(y)):
        if predictions[i] == 0 and y[i] == 0:
            print("True Negative:", tweets[i], "\n")
            counter += 1
    print(counter)
    
def show_fn():
    predictions = get_predictions()
    tweets = test_data['tweet']
    counter = 0
    for i in range(len(y)):
        if predictions[i] == 0 and y[i] == 1:
            print("False Negative:", tweets[i], "\n")
            counter += 1
    print(counter)
    
def show_fp():
    predictions = get_predictions()
    tweets = test_data['tweet'].tolist()
    for i in range(len(y)):
        if predictions[i] == 1 and y[i] == 0:
            print("False Positive:")
            doc = nlp(str(tweets[i]))
            if doc.ents:
                displacy.render(doc,style = "ent")

[]


# Checking for overlapping words

In [4]:
rankTweet("chicken", None)
# def checkForOverlaps(data):
    
#     print(data)
# keywords = foodKeeperInfo()
# testdata = convertToTrainingFormat("My rice cakes is tasty ", keywords)

# # for word in keywords:
# #     for word2 in wordsInFoodkeeper:
# #         if word in word2 and word != word2:
# #             print(word,word2)



0

## Use the function below to check individual sentences

In [8]:
nlp = spacy.load(MODEL_PATH)
ent_recognize("my friend is chicken because he is scared")

testTweets = live_tweets[5]
for tweet in testTweets[:500]:
    if nlp(preProcess(tweet)).ents:
        ent_recognize(preProcess(tweet))


## Use the function below to check model performance on the entire test set

In [None]:
#eval_model()

## Use the functions below to see TP, TN, FP, FN respectively

In [2]:
# show_tp()
# show_tn()
# show_fp()
#show_fn()

In [23]:
# foodkeeper = foodKeeperInfo()
# print(foodkeeper)
# sortedKeywords =  sorted(keywordRanker, key=keywordRanker.get, reverse=True)

# for i in range(15): #sortedKeywords
#     keywords.append(sortedKeywords[i])
# print(keywords)
# keywords.append("chicken")
# #keywords.append("cream cheese")

# abc = convertToTrainingFormat("I like to eat cream and cheese with chicken test", keywords)
# print(abc)


# Find the most common words

In [2]:
commonVerbs = getCommonVerbs(training_data) 
#commonVerbs = ['eat', 'know', 'think', 'want', 'got', 'like', 'love', 'use', 'need', 'add']

# Training Loop

In [3]:
trainModel(training_data)
#print(keywords)  


['eat', 'know', 'think', 'want', 'got', 'like', 'love', 'use', 'need', 'add']
Common Verbs gathered... 

~~~~~~~~~~~~~~~~~0~~~~~~~~~~~~~~~~~
Model loaded...
Total keywords:  15
Total Tweets:  0
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef'] 


~~~~~~~~~~~~~~~~~1~~~~~~~~~~~~~~~~~
Model loaded...


100%|███████████████████████████████████████████████████████████████████████████████████████████| 194/194 [00:00<00:00, 1283.66it/s]


Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     40.50    0.00    0.00    0.00    0.00
  2     200         54.36   1148.80   97.98   96.86   99.13    0.98
  4     400        136.43    159.72   99.32   98.78   99.88    0.99
  8     600        154.84    106.89   99.81   99.75   99.88    1.00
 12     800         81.31     37.85  100.00  100.00  100.00    1.00
 17    1000         39.90     10.64  100.00  100.00  100.00    1.00
 24    1200         65.72     17.81  100.00  100.00  100.00    1.00
 32    1400        109.73     23.85  100.00  100.00  100.00    1.00
 42    1600        123.47     24.43  100.00  100.00  100.00    1.00
 55    1800        136.49     18.88   99.94   99.88  

 85%|█████████████████████████████████████████████████████████████████████████████▏             | 434/512 [00:00<00:00, 1485.15it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|███████████████████████████████████████████████████████████████████████████████████████████| 512/512 [00:00<00:00, 1451.24it/s]


Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     40.67    0.00    0.00    0.00    0.00
  0     200         77.58   2734.03   84.27   79.72   89.38    0.84
  1     400        105.43   1399.69   92.27   89.58   95.12    0.92
  3     600        165.66   1085.43   94.66   91.67   97.86    0.95
  4     800        179.58    801.95   95.67   92.47   99.10    0.96
  6    1000        246.21    558.21   98.84   98.22   99.47    0.99
  9    1200        298.11    450.98   99.14   98.48   99.81    0.99
 12    1400        310.65    383.93   99.89   99.85   99.92    1.00
 16    1600        415.85    318.60   99.89   99.89   99.89    1.00
 21    1800        550.45    237.89   99.91   99.85  

Model loaded...


 21%|██████████████████▊                                                                       | 418/2004 [00:00<00:01, 1413.76it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 37%|████████████████████████████████▉                                                         | 734/2004 [00:00<00:00, 1511.49it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 62%|███████████████████████████████████████████████████████▏                                 | 1242/2004 [00:00<00:00, 1647.57it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 88%|██████████████████████████████████████████████████████████████████████████████           | 1759/2004 [00:01<00:00, 1694.78it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 2004/2004 [00:01<00:00, 1611.49it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     31.50    0.06    2.86    0.03    0.00
  0     200         95.78   2700.34   81.45   74.61   89.67    0.81
  0     400         75.00   1258.96   88.90   82.63   96.20    0.89
  0     600         84.04   1333.61   91.46   86.88   96.54    0.91
  1     800         97.48   1418.21   93.22   90.68   95.89    0.93
  1    1000        241.27   1463.35   94.29   91.70   97.03    0.94
  2    1200        142.78   1344.49   96.62   95.39   97.88    0.97
  3    1400        180.29   1262.98   97.47   96.15   98.82    0.97
  4    1600        261.42

Model loaded...


  9%|████████▌                                                                                 | 277/2923 [00:00<00:01, 1399.44it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 25%|██████████████████████▉                                                                   | 745/2923 [00:00<00:01, 1539.97it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 44%|██████████████████████████████████████▊                                                  | 1276/2923 [00:00<00:00, 1702.37it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 62%|███████████████████████████████████████████████████████                                  | 1809/2923 [00:01<00:00, 1752.79it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 87%|█████████████████████████████████████████████████████████████████████████████            | 2530/2923 [00:01<00:00, 1783.21it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 2923/2923 [00:01<00:00, 1697.47it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     38.83    0.00    0.00    0.00    0.00
  0     200        101.58   2674.06   83.01   81.21   84.89    0.83
  0     400         88.64   1482.91   85.89   85.79   85.98    0.86
  0     600        101.59   1468.01   90.26   85.51   95.57    0.90
  0     800        103.72   1728.55   91.52   86.75   96.85    0.92
  1    1000        114.53   1640.66   93.34   89.25   97.81    0.93
  1    1200        159.35   1808.17   94.18   90.56   98.10    0.94
  2    1400        199.41   1751.57   95.58   94.50   96.68    0.96
  3    1600        235.71   1919.52   96.54   94.22   98.98    0.97
  4    1800        324.77   1745.34   97.89   97.07   98.73    0.98
  5    2000        430.17   1637.

Model loaded...


 13%|███████████▍                                                                              | 425/3344 [00:00<00:02, 1436.67it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 28%|████████████████████████▊                                                                 | 922/3344 [00:00<00:01, 1611.65it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 43%|██████████████████████████████████████▌                                                  | 1448/3344 [00:00<00:01, 1705.27it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 59%|████████████████████████████████████████████████████▉                                    | 1987/3344 [00:01<00:00, 1769.24it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 76%|███████████████████████████████████████████████████████████████████▌                     | 2537/3344 [00:01<00:00, 1811.05it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 92%|██████████████████████████████████████████████████████████████████████████████████▏      | 3087/3344 [00:01<00:00, 1819.42it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3344/3344 [00:01<00:00, 1720.02it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     38.33    0.00    0.00    0.00    0.00
  0     200        110.68   2743.56   81.51   79.88   83.21    0.82
  0     400         79.19   1395.62   87.96   83.58   92.83    0.88
  0     600         82.18   1606.36   89.26   87.44   91.16    0.89
  0     800         81.57   1576.54   91.59   87.69   95.84    0.92
  1    1000         97.37   1491.33   93.10   89.18   97.38    0.93
  1    1200        135.58   1795.41   93.90   92.94   94.87    0.94
  2    1400        153.75   1970.09   95.26   93.36   97.24    0.95
  2    16

Model loaded...


 12%|██████████▉                                                                               | 424/3488 [00:00<00:02, 1431.69it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 26%|███████████████████████▋                                                                  | 916/3488 [00:00<00:01, 1592.07it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 42%|█████████████████████████████████████                                                    | 1452/3488 [00:00<00:01, 1724.34it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 57%|██████████████████████████████████████████████████▊                                      | 1992/3488 [00:01<00:00, 1775.90it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 68%|████████████████████████████████████████████████████████████▏                            | 2361/3488 [00:01<00:00, 1809.11it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 89%|██████████████████████████████████████████████████████████████████████████████▉          | 3096/3488 [00:01<00:00, 1821.94it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3488/3488 [00:02<00:00, 1728.67it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     27.00    0.00    0.00    0.00    0.00
  0     200         81.25   2735.76   80.82   78.13   83.71    0.81
  0     400         68.47   1412.99   88.20   83.42   93.57    0.88
  0     600         84.95   1466.95   90.30   85.72   95.40    0.90
  0     800         80.41   1667.62   90.58   88.37   92.90    0.91
  1    1000         95.85   1797.97   92.34   88.42   96.62    0.92
  1    1200        119.88   1815.78   93.87   91.06   96.85    0.94
  2    1400        135.40   2053.10   95.00   92.16   98.03    0.95
  2    1600        206.65   2021.02   96.37   95.11   97.66    0.96
  3    1800        274.96   2070.77   97.21   96.00   98.45    0.97
  4    2000        355.76   2015.

[[31 13]
 [ 7 32]]
              precision    recall  f1-score   support

           1       0.82      0.70      0.76        44
           0       0.71      0.82      0.76        39

    accuracy                           0.76        83
   macro avg       0.76      0.76      0.76        83
weighted avg       0.77      0.76      0.76        83

Total keywords:  361
Total Tweets:  3488
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 've

 12%|██████████▊                                                                               | 422/3509 [00:00<00:02, 1421.11it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 21%|███████████████████▏                                                                      | 747/3509 [00:00<00:01, 1548.04it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 41%|████████████████████████████████████▋                                                    | 1445/3509 [00:00<00:01, 1700.28it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 57%|██████████████████████████████████████████████████▍                                      | 1989/3509 [00:01<00:00, 1772.94it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 72%|████████████████████████████████████████████████████████████████▎                        | 2538/3509 [00:01<00:00, 1810.45it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 88%|██████████████████████████████████████████████████████████████████████████████▍          | 3091/3509 [00:01<00:00, 1825.58it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3509/3509 [00:02<00:00, 1724.12it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     28.67    0.00    0.00    0.00    0.00
  0     200         97.46   2808.28   81.54   78.24   85.14    0.82
  0     400         85.06   1515.82   87.57   84.31   91.09    0.88
  0     600        102.28   1465.25   89.10   85.67   92.81    0.89
  0     800         84.54   1628.01   91.69   87.74   96.01    0.92
  1    1000         93.49   1634.11   92.16   88.42   96.23    0.92
  1    1200        113.37   1779.25   93.99   91.47   96.65    0.94
  2    1400        145.19   2044.28   94.88   92.23   97.68    0.95
  2    1600        229.65   1984.22   96.36   94.84   97.94    0.96
  3    1800        284.38   2038.56   97.19   95.70   98.73    0.97
  4    2000        364.08   1935.

[[32 12]
 [ 7 32]]
              precision    recall  f1-score   support

           1       0.82      0.73      0.77        44
           0       0.73      0.82      0.77        39

    accuracy                           0.77        83
   macro avg       0.77      0.77      0.77        83
weighted avg       0.78      0.77      0.77        83

Total keywords:  362
Total Tweets:  3509
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 've

 12%|██████████▋                                                                               | 417/3513 [00:00<00:02, 1403.20it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 21%|██████████████████▊                                                                       | 735/3513 [00:00<00:01, 1519.97it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 40%|███████████████████████████████████▊                                                     | 1415/3513 [00:00<00:01, 1668.19it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 55%|█████████████████████████████████████████████████▏                                       | 1941/3513 [00:01<00:00, 1726.55it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 66%|██████████████████████████████████████████████████████████▎                              | 2303/3513 [00:01<00:00, 1767.70it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 86%|████████████████████████████████████████████████████████████████████████████▉            | 3036/3513 [00:01<00:00, 1798.04it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3513/3513 [00:02<00:00, 1694.01it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     31.33    0.23    0.78    0.13    0.00
  0     200         83.63   2648.01   80.03   76.36   84.08    0.80
  0     400         93.86   1419.53   85.76   80.21   92.13    0.86
  0     600         94.05   1516.47   89.96   86.12   94.17    0.90
  0     800         86.14   1691.96   91.37   86.30   97.08    0.91
  1    1000        105.08   1575.57   92.89   89.49   96.57    0.93
  1    1200        132.94   1825.23   94.07   92.60   95.59    0.94
  2    1400        149.54   2024.45   94.78   91.78   97.97    0.95
  2    1600        203.88   1985.45   96.28   95.47   97.10    0.96
  3    1800        268.98   2057.62   97.17   96.88   97.46    0.97
  4    2000        323.96   2004.

Decreasing entityCheckCount variable by 1
entityCheckCount =  2
[[34 10]
 [ 4 35]]
              precision    recall  f1-score   support

           1       0.89      0.77      0.83        44
           0       0.78      0.90      0.83        39

    accuracy                           0.83        83
   macro avg       0.84      0.84      0.83        83
weighted avg       0.84      0.83      0.83        83

Total keywords:  362
Total Tweets:  3513
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'musta

  7%|██████▎                                                                                   | 444/6286 [00:00<00:03, 1497.41it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 12%|██████████▉                                                                               | 762/6286 [00:00<00:03, 1553.50it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 17%|███████████████▍                                                                         | 1094/6286 [00:00<00:03, 1615.77it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 26%|██████████████████████▊                                                                  | 1614/6286 [00:01<00:02, 1695.59it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 31%|████████████████████████████                                                             | 1979/6286 [00:01<00:02, 1757.61it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 43%|██████████████████████████████████████▎                                                  | 2706/6286 [00:01<00:02, 1774.37it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 52%|██████████████████████████████████████████████▎                                          | 3273/6286 [00:01<00:01, 1848.77it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 61%|██████████████████████████████████████████████████████▎                                  | 3836/6286 [00:02<00:01, 1868.10it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 67%|███████████████████████████████████████████████████████████▋                             | 4217/6286 [00:02<00:01, 1887.23it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 79%|██████████████████████████████████████████████████████████████████████▌                  | 4981/6286 [00:02<00:00, 1878.67it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 88%|██████████████████████████████████████████████████████████████████████████████▌          | 5550/6286 [00:03<00:00, 1888.86it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 94%|███████████████████████████████████████████████████████████████████████████████████▉     | 5928/6286 [00:03<00:00, 1886.64it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

100%|█████████████████████████████████████████████████████████████████████████████████████████| 6286/6286 [00:03<00:00, 1793.18it/s]



Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     17.67    0.77    0.75    0.79    0.01
  0     200         90.11   2571.37   75.26   78.14   72.58    0.75
  0     400         79.92   1449.29   85.90   80.55   92.00    0.86
  0     600         74.54   1337.83   88.23   83.58   93.43    0.88
  0     800         90.84   1561.81   89.13   83.79   95.20    0.89
  0    1000         97.38   1832.50   91.00   87.39   94.91    0.91
  0    1200        103.82   1962.63   91.71   86.56   97.51    0.92
  1    1400        132.77   2009.11   92.19   89.79   94.73    0.92
  1    1600        157.51   2434.89   93

[[34 10]
 [ 6 33]]
              precision    recall  f1-score   support

           1       0.85      0.77      0.81        44
           0       0.77      0.85      0.80        39

    accuracy                           0.81        83
   macro avg       0.81      0.81      0.81        83
weighted avg       0.81      0.81      0.81        83

Total keywords:  370
Total Tweets:  6286
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 've

  5%|████▎                                                                                     | 289/5956 [00:00<00:03, 1458.75it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 16%|██████████████▏                                                                           | 940/5956 [00:00<00:03, 1612.31it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 25%|█████████████████████▉                                                                   | 1466/5956 [00:00<00:02, 1714.76it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 34%|██████████████████████████████                                                           | 2014/5956 [00:01<00:02, 1788.82it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity


 40%|███████████████████████████████████▌                                                     | 2381/5956 [00:01<00:01, 1798.59it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 53%|██████████████████████████████████████████████▉                                          | 3138/5956 [00:01<00:01, 1879.05it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 62%|███████████████████████████████████████████████████████▍                                 | 3706/5956 [00:02<00:01, 1879.56it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 69%|█████████████████████████████████████████████████████████████                            | 4090/5956 [00:02<00:00, 1901.10it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 78%|█████████████████████████████████████████████████████████████████████▌                   | 4658/5956 [00:02<00:00, 1861.36it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 88%|██████████████████████████████████████████████████████████████████████████████▏          | 5229/5956 [00:02<00:00, 1882.44it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 97%|██████████████████████████████████████████████████████████████████████████████████████▌  | 5795/5956 [00:03<00:00, 1875.24it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 5956/5956 [00:03<00:00, 1802.21it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     37.17    0.15    1.00    0.08    0.00
  0     200         86.57   2654.16   81.23   80.01   82.48    0.81
  0     400         73.82   1344.28   87.75   84.59   91.16    0.88
  0     600         65.42   1321.66   88.79   83.69   94.56    0.89
  0     800         90.29   1431.64   91.16   87.22   95.48    0.91
  0    1000        124.08   1700.62   91.34   86.96   96.19    0.91
  0    1200        407.63   1910.31   92.56   88.30   97.24    0.93
  1    1400        308.91   1987.98   93.60   90.00   97.50    0.94
  1    1600        140.61   2335.63   94.64   91.57   97.92    0.95
  2  

[[37  7]
 [ 5 34]]
              precision    recall  f1-score   support

           1       0.88      0.84      0.86        44
           0       0.83      0.87      0.85        39

    accuracy                           0.86        83
   macro avg       0.86      0.86      0.86        83
weighted avg       0.86      0.86      0.86        83

Total keywords:  373
Total Tweets:  5956
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 've

  4%|███▉                                                                                      | 286/6580 [00:00<00:04, 1442.88it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 14%|████████████▍                                                                             | 913/6580 [00:00<00:03, 1560.30it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 21%|███████████████████                                                                      | 1411/6580 [00:00<00:03, 1629.54it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 29%|██████████████████████████                                                               | 1931/6580 [00:01<00:02, 1715.84it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 35%|███████████████████████████████                                                          | 2297/6580 [00:01<00:02, 1773.65it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 46%|████████████████████████████████████████▉                                                | 3028/6580 [00:01<00:01, 1811.24it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 52%|█████████████████████████████████████████████▉                                           | 3396/6580 [00:02<00:01, 1824.96it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 60%|█████████████████████████████████████████████████████▍                                   | 3949/6580 [00:02<00:01, 1813.73it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 69%|█████████████████████████████████████████████████████████████                            | 4512/6580 [00:02<00:01, 1857.16it/s]


Skipping entity
Skipping entity


 74%|██████████████████████████████████████████████████████████████████                       | 4884/6580 [00:02<00:00, 1832.06it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 82%|█████████████████████████████████████████████████████████████████████████▍               | 5427/6580 [00:03<00:00, 1779.87it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 91%|████████████████████████████████████████████████████████████████████████████████▊        | 5979/6580 [00:03<00:00, 1814.97it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 96%|█████████████████████████████████████████████████████████████████████████████████████▊   | 6345/6580 [00:03<00:00, 1821.81it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 6580/6580 [00:03<00:00, 1750.13it/s]


Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     41.50    0.00    0.00    0.00    0.00
  0     200         76.50   2455.72   79.16   76.04   82.55    0.79
  0     400         78.09   1414.87   85.38   79.18   92.63    0.85
  0     600         73.82   1344.33   88.09   85.17   91.23    0.88
  0     800         92.90   1538.68   89.77   84.99   95.12    0.90
  0    1000         91.11   1620.63   90.58   85.23   96.65    0.91
  0    1200         98.04   1974.11   91.89   88.24   95.87    0.92
  1    1400        125.08   2158.46   92.77   90.28   95.40    0.93
  1    1600        134.51   2367.97   93.48   90.35   96.82    0.93
  1    1800        171.84   2695.21   94.37   92.03  

[[36  8]
 [ 6 33]]
              precision    recall  f1-score   support

           1       0.86      0.82      0.84        44
           0       0.80      0.85      0.83        39

    accuracy                           0.83        83
   macro avg       0.83      0.83      0.83        83
weighted avg       0.83      0.83      0.83        83

Total keywords:  375
Total Tweets:  6580
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 've

  4%|███▊                                                                                      | 270/6289 [00:00<00:04, 1364.71it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 11%|██████████▎                                                                               | 720/6289 [00:00<00:03, 1470.95it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 19%|████████████████▊                                                                        | 1192/6289 [00:00<00:03, 1548.99it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 32%|████████████████████████████▋                                                            | 2030/6289 [00:01<00:02, 1666.88it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 38%|█████████████████████████████████▋                                                       | 2379/6289 [00:01<00:02, 1709.44it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 43%|██████████████████████████████████████▌                                                  | 2724/6289 [00:01<00:02, 1707.21it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 52%|██████████████████████████████████████████████▏                                          | 3260/6289 [00:02<00:01, 1755.05it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 61%|█████████████████████████████████████████████████████▊                                   | 3806/6289 [00:02<00:01, 1797.40it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 66%|███████████████████████████████████████████████████████████                              | 4173/6289 [00:02<00:01, 1813.19it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 72%|████████████████████████████████████████████████████████████████▏                        | 4537/6289 [00:02<00:00, 1802.37it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 78%|█████████████████████████████████████████████████████████████████████▎                   | 4897/6289 [00:02<00:00, 1776.68it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 86%|████████████████████████████████████████████████████████████████████████████▉            | 5439/6289 [00:03<00:00, 1784.43it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 95%|████████████████████████████████████████████████████████████████████████████████████▌    | 5974/6289 [00:03<00:00, 1766.32it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|█████████████████████████████████████████████████████████████████████████████████████████| 6289/6289 [00:03<00:00, 1702.18it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     33.00    0.67    1.73    0.41    0.01
  0     200         79.56   2551.88   76.85   79.08   74.74    0.77
  0     400         68.77   1357.91   84.85   80.70   89.46    0.85
  0     600         62.17   1357.26   89.17   85.95   92.65    0.89
  0     800         74.72   1462.79   89.61   84.09   95.91    0.90
  0    1000         86.86   1620.86   91.67   87.72   95.99    0.92
  0    1200         97.23   1967.74   92.84   88.37   97.78    0.93
  1    1400        120.71   1857.27   93.01   88.52   97.97    0.93
  1    1600        169.74   2196.41   94.

Decreasing entityCheckCount variable by 1
entityCheckCount =  1
[[37  7]
 [ 4 35]]
              precision    recall  f1-score   support

           1       0.90      0.84      0.87        44
           0       0.83      0.90      0.86        39

    accuracy                           0.87        83
   macro avg       0.87      0.87      0.87        83
weighted avg       0.87      0.87      0.87        83

Total keywords:  375
Total Tweets:  6289
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'musta

  4%|███▍                                                                                     | 491/12783 [00:00<00:07, 1649.60it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


  8%|███████                                                                                 | 1019/12783 [00:00<00:06, 1714.67it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 12%|██████████▌                                                                             | 1540/12783 [00:00<00:06, 1681.33it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 15%|█████████████                                                                           | 1891/12783 [00:01<00:06, 1719.65it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 21%|██████████████████                                                                      | 2632/12783 [00:01<00:05, 1819.12it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 23%|████████████████████▋                                                                   | 3000/12783 [00:01<00:05, 1829.87it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 28%|████████████████████████▌                                                               | 3560/12783 [00:02<00:04, 1847.16it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 34%|█████████████████████████████▊                                                          | 4331/12783 [00:02<00:04, 1911.35it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 38%|█████████████████████████████████▊                                                      | 4910/12783 [00:02<00:04, 1918.28it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 43%|█████████████████████████████████████▊                                                  | 5501/12783 [00:03<00:03, 1932.32it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 48%|█████████████████████████████████████████▉                                              | 6088/12783 [00:03<00:03, 1936.03it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 52%|██████████████████████████████████████████████                                          | 6690/12783 [00:03<00:03, 1985.83it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 55%|████████████████████████████████████████████████▊                                       | 7091/12783 [00:03<00:02, 1983.58it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 60%|████████████████████████████████████████████████████▉                                   | 7692/12783 [00:04<00:02, 1984.86it/s]


Skipping entity
Skipping entity


 64%|███████████████████████████████████████████████████████▉                                | 8119/12783 [00:04<00:02, 2032.23it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 68%|████████████████████████████████████████████████████████████                            | 8730/12783 [00:04<00:02, 2025.45it/s]

Skipping entity
Skipping entity
Skipping entity

 76%|███████████████████████████████████████████████████████████████████                     | 9737/12783 [00:05<00:01, 1973.32it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 81%|██████████████████████████████████████████████████████████████████████▎                | 10335/12783 [00:05<00:01, 1984.35it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 86%|██████████████████████████████████████████████████████████████████████████▍            | 10930/12783 [00:05<00:00, 1963.11it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 89%|█████████████████████████████████████████████████████████████████████████████▏         | 11333/12783 [00:05<00:00, 1989.88it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 93%|█████████████████████████████████████████████████████████████████████████████████▏     | 11933/12783 [00:06<00:00, 1979.55it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

 98%|█████████████████████████████████████████████████████████████████████████████████████▎ | 12528/12783 [00:06<00:00, 1968.15it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity

100%|███████████████████████████████████████████████████████████████████████████████████████| 12783/12783 [00:06<00:00, 1907.98it/s]



[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     30.33    0.00    0.00    0.00    0.00
  0     200         82.57   2399.29   72.75   81.02   66.02    0.73
  0     400         70.26   1252.15   84.67   80.48   89.31    0.85
  0     600         67.85   1249.39   87.50   82.34   93.36    0.88
  0     800         77.50   1357.19   88.66   83.07   95.05    0.89
  0    1000         78.73   1593.03   90.27   85.48   95.63    0.90
  0    1200         87.31   1657.41   91.23   87.40   95.41    0.91
  0    1400        106.24   1984.89   92.21   87.83   97.05    0.92
  0    1600        120.91   2306.28   92.28   87.68   97.39    0.92
  1    1800        132.43   2571.55   92.95   88.57   97.78    0.93
  1    2000        191.49   2965

[[37  7]
 [ 3 36]]
              precision    recall  f1-score   support

           1       0.93      0.84      0.88        44
           0       0.84      0.92      0.88        39

    accuracy                           0.88        83
   macro avg       0.88      0.88      0.88        83
weighted avg       0.88      0.88      0.88        83

Total keywords:  385
Total Tweets:  12783
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 'v

  4%|███▌                                                                                     | 500/12355 [00:00<00:06, 1697.67it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


  8%|███████▎                                                                                | 1032/12355 [00:00<00:06, 1768.11it/s]

Skipping entity
Skipping entity


 13%|███████████▏                                                                            | 1572/12355 [00:00<00:06, 1775.22it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity


 17%|███████████████▏                                                                        | 2132/12355 [00:01<00:05, 1845.22it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 20%|█████████████████▉                                                                      | 2512/12355 [00:01<00:05, 1869.07it/s]

Skipping entity
Skipping entity


 27%|███████████████████████▎                                                                | 3275/12355 [00:01<00:04, 1905.12it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 31%|███████████████████████████▍                                                            | 3857/12355 [00:02<00:04, 1903.03it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 36%|███████████████████████████████▋                                                        | 4451/12355 [00:02<00:04, 1940.25it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 41%|████████████████████████████████████                                                    | 5059/12355 [00:02<00:03, 1992.75it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 44%|██████████████████████████████████████▉                                                 | 5458/12355 [00:02<00:03, 1978.35it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 51%|████████████████████████████████████████████▋                                           | 6267/12355 [00:03<00:03, 2003.31it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 56%|████████████████████████████████████████████████▉                                       | 6879/12355 [00:03<00:02, 2028.00it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 59%|███████████████████████████████████████████████████▉                                    | 7285/12355 [00:03<00:02, 2014.85it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 66%|█████████████████████████████████████████████████████████▉                              | 8133/12355 [00:04<00:02, 2065.28it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 71%|██████████████████████████████████████████████████████████████▍                         | 8758/12355 [00:04<00:01, 2058.01it/s]

Skipping entity
Skipping entity
Skipping entity

 76%|██████████████████████████████████████████████████████████████████▊                     | 9372/12355 [00:04<00:01, 2015.24it/s]


Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 81%|███████████████████████████████████████████████████████████████████████                 | 9980/12355 [00:05<00:01, 2013.93it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 86%|██████████████████████████████████████████████████████████████████████████▌            | 10582/12355 [00:05<00:00, 1982.53it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 91%|██████████████████████████████████████████████████████████████████████████████▊        | 11198/12355 [00:05<00:00, 2026.57it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


 94%|█████████████████████████████████████████████████████████████████████████████████▋     | 11605/12355 [00:05<00:00, 2013.55it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity


100%|███████████████████████████████████████████████████████████████████████████████████████| 12355/12355 [00:06<00:00, 1952.46it/s]

Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity
Skipping entity





[i] Saving to output directory: output
[i] Using CPU
[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     38.50    0.00    0.00    0.00    0.00
  0     200         72.30   2485.37   74.89   76.97   72.93    0.75
  0     400         66.15   1302.31   85.83   81.75   90.33    0.86
  0     600         69.20   1264.43   87.27   82.45   92.68    0.87
  0     800         78.99   1260.53   90.43   85.88   95.49    0.90
  0    1000         80.60   1535.29   91.60   87.46   96.15    0.92
  0    1200         76.54   1666.45   92.43   87.77   97.61    0.92
  0    1400        104.16   1734.92   93.20   89.37   97.37    0.93
  0    1600        129.37   2146.94   93.45   89.83   97.38    0.93
  1    1800        157.49   2347.43   93.72   89.85   97.95    0.94
  1    2000        164.65   2564.

[[37  7]
 [ 3 36]]
              precision    recall  f1-score   support

           1       0.93      0.84      0.88        44
           0       0.84      0.92      0.88        39

    accuracy                           0.88        83
   macro avg       0.88      0.88      0.88        83
weighted avg       0.88      0.88      0.88        83

Total keywords:  385
Total Tweets:  12355
List of Keywords:

 ['cheese', 'chicken', 'milk', 'butter', 'cream', 'fruit', 'rice', 'water', 'garlic', 'bread', 'sugar', 'salt', 'chocolate', 'fish', 'beef', 'yogurt', 'almond milk', 'greens', 'granola', 'nuts', 'chia seeds', 'onions', 'apples', 'peanut butter', 'squash', 'soda', 'flour', 'garlic powder', 'buttermilk', 'potatoes', 'cottage cheese', 'sour cream', 'turkey', 'beans', 'cream cheese', 'bagel', 'pretzels', 'celery', 'white wine', 'margarine', 'whipped cream', 'ice cream', 'pineapple', 'whipped topping', 'plantains', 'veal', 'mustard', 'corned beef', 'rye', 'fried chicken', 'gravy', 'tuna', 'v

In [4]:
def getCommonVerbs():
    import en_core_web_sm
    nlp2 = spacy.load("en_core_web_sm")
    count = 0
    myVerbs = {}
    for i in range(10): #(len(training_data)):
        doc = nlp2(training_data[0][i])
        for token in doc:
            print(token, token.pos_)
            if token.pos_ == "VERB":
                if token.text in myVerbs:
                    myVerbs[token.text] = myVerbs[token.text] + 1
                else:
                    if token.text not in nlp2.Defaults.stop_words:
                        myVerbs[token.text] = 1
        
    print(count)
    topVerbs = dict(sorted(myVerbs.items(), key = lambda item: item[1], reverse=True)[:10])
    return [key for key in topVerbs]
print(getCommonVerbs())

@feelsosscatterd PROPN
BILLIE PROPN
IS AUX
BUTTER PROPN
? PUNCT
@coffeespoonie PROPN
Green PROPN
yogurt NOUN
smoothie NOUN
— PUNCT
yogurt PROPN
w/ PROPN
almond PROPN
milk NOUN
, PUNCT
fruit NOUN
, PUNCT
greens NOUN
. PUNCT
Blend PROPN
. PUNCT
Then ADV
add VERB
granola NOUN
, PUNCT
nuts NOUN
, PUNCT
chia ADJ
seeds NOUN
. PUNCT
Smoked ADJ
salmon NOUN
on ADP
sliced PROPN
cucumber PROPN
& CCONJ
amp PROPN
; PUNCT
whole ADJ
wheat NOUN
toast NOUN
w/ NOUN
sliced VERB
raw ADJ
purple NOUN
( PUNCT
red ADJ
) PUNCT
onions NOUN
& CCONJ
amp PROPN
; PUNCT
a DET
dash NOUN
of ADP
lemon NOUN
pepper NOUN
. PUNCT
Apples NOUN
or CCONJ
chocolate VERB
w/ NOUN
peanut NOUN
butter NOUN
. PUNCT
Jarred PROPN
peaches VERB
w/ PROPN
yogurt PROPN
. PUNCT
https://t.co/t7ouQvMU8 PROPN
m VERB
@LuLuRoche PROPN
Or CCONJ
2 X
. PUNCT
Mix VERB
the DET
egg NOUN
in ADP
milk NOUN
. PUNCT
In ADP
the DET
meantime NOUN
crush NOUN
up ADP
soda NOUN
crackers NOUN
with ADP
flour NOUN
. PUNCT
Crush VERB
the DET
crackers NOUN
really ADV


In [3]:
nlp = spacy.load(MODEL_PATH)
myTweet = "My chicken is tasty"
ent_recognize("chicken curry with coconut cream rice cake made with coconut water topped with salted egg sweet bean and candied baby coconut dessert topped with fresh shaved ice leche flan made with duck eggs and lime zest pastillas de leche made with carabao milk")
# x = nlp(myTweet)
# splitSent = myTweet.split()
# for i in range(len(x.ents)):
#     if str(x.ents[i]) in splitSent:
#         print(splitSent.index(str(x.ents[i])))
        
# print(x.doc.ents)
# print(splitSent)

In [4]:
#def findPreviousNextWord(tweets, keywords):
foodKeeperKeywords = foodKeeperInfo()
commonWords = {}
commonPairWords = {}
nlp2 = spacy.load("en_core_web_sm")
for i in range(len(training_data)):
    #Preprocess and remove stop words from each tweet
    myTweet = preProcess(training_data[0][i]).split()
    myTweet = [word for word in myTweet if not word in nlp2.Defaults.stop_words]
    
    for i in range(len(myTweet)):
        if myTweet[i] in foodKeeperKeywords:
            try:
                leftWord = myTweet[i-1]
                if leftWord in commonWords:
                    commonWords[leftWord] += 1
                else:
                    commonWords[leftWord] = 1
            except:
                pass
            try: 
                rightWord = myTweet[i+1]
                if rightWord in commonWords:
                    commonWords[rightWord] += 1
                else:
                    commonWords[rightWord] = 1
            except:
                pass
            try:
                if (leftWord, rightWord) in commonPairWords:
                    commonPairWords[(leftWord,rightWord)] += 1
                else:
                    commonPairWords[(leftWord,rightWord)] = 1
            except:
                pass

#commonWords are words that occur before and after a keyword
commonWords = dict(sorted(commonWords.items(), key = lambda item: item[1], reverse=True))
commonWords = [key for key in commonWords]

#commonPairWords are pairs or words that occur before and after a keyword
commonPairWords = dict(sorted(commonPairWords.items(), key = lambda item: item[1], reverse=True))
#print(training_data[0][1000])
#print(foodKeeperKeywords)

In [None]:
#print(commonWords[:25])
myIter= iter(commonPairWords)
for iter1 in iter(commonPairWords):
    print(iter1, commonPairWords[iter1])

In [80]:
import re
#count all words and adjacent
def findAdjacentWords(data,keywords):
    allWords = {}
    count = 0
    for tweet in data[0][:10]:
        splitTweet = preProcess(tweet).split()
        for i in range(len(splitTweet)):
            word = splitTweet[i]
            try:
                biword = word + " " + splitTweet[i + 1]
                if biword in keywords:
                    print("Biword:", biword)
                    continue
            except:
                pass
            try:
                triword = word + " " + splitTweet[i + 1] + " " + splitTweet[i + 2]
                if triword in keywords:
                    print("Triword:", triword)
                    continue
            except:
                pass
            
        
    
    '''mySent = "I am eating chicken for breakfast lunch and dinner tonight."
    mySearch = re.search('chicken', mySent)
    mySpan = mySearch.span()
    print(mySent[mySpan[0]:mySpan[1]])
    '''
findAdjacentWords(training_data, foodKeeperKeywords)

Biword: almond milk
Biword: chia seeds
Biword: peanut butter
Biword: peanut butter
Biword: cashew butter


# Test Data with 1.6 Million Tweet Dataset

In [13]:
# print(live_tweets[5][:10])
print(test_data['tweet'])

0     just microwaved a kashi chicken and spinach th...
1      thats really sad i wolud hate that! but i had...
2      and it took me my entire walk to the train st...
3     just finished cooking spag bol from scratch in...
4     oh noooooo kath is back from annual leave!!!!!...
                            ...                        
78    sick roomie gave me her cold my throats sore (...
79    no flying to ponca city today for breakfast oa...
80    just walked by marksandspencers food n didn't ...
81    if lucas till and taylor swift start dating i ...
82                             bed time back to reality
Name: tweet, Length: 83, dtype: object


# See what keywords are found by the created model

In [15]:
keywordsFound = []
nlp = spacy.load(MODEL_PATH)
# print(nlp.pipeline)
for tweet in live_tweets[5]: #test_data['tweet']:
    modeledTweet = nlp(preProcess(tweet))
    for token in modeledTweet.doc.ents:
        if str(token) in keywordsFound: continue
        keywordsFound.append(str(token))

# If the model finds food keywords print the Tweet
To help visualize which keywords are being found this loop iterates through the Tweets testing the model to see what keywords it finds. If it finds a keyword in the Tweet it will print the Tweet and highlight the keyword

In [14]:
for tweet in live_tweets[5]: #test_data['tweet'][:500]:
    ents = nlp(preProcess(tweet))
    #if ents.doc.ents:
    ent_recognize(preProcess(tweet))

KeyboardInterrupt: 

# Old Results (Don't Change Unless on Purpose!)

In [None]:
foodK1 = foodKeeperInfo()
notInFoodKeeper = []
inFoodKeeper = []
for keyword in keywordsFound:
    if keyword not in foodK1 and keyword not in notInFoodKeeper: notInFoodKeeper.append(keyword)
    elif keyword in foodK1 and keyword not in inFoodKeeper: inFoodKeeper.append(keyword)
        
print(len(notInFoodKeeper), len(inFoodKeeper))
print(inFoodKeeper)
#Normal Process

In [16]:
foodK1 = foodKeeperInfo()
notInFoodKeeper = []
inFoodKeeper = []
for keyword in keywordsFound:
    if keyword not in foodK1 and keyword not in notInFoodKeeper: notInFoodKeeper.append(keyword)
    elif keyword in foodK1 and keyword not in inFoodKeeper: inFoodKeeper.append(keyword)
        
print(len(notInFoodKeeper), len(inFoodKeeper))

10443 316
