### NOTE : The model you'll generate using this code and base data (foodcsv.xlsx) is already in this repo in folder FoodNER. You can use that directly in FoodItemRating Notebook. 

You can use this notebook if you have extracted more labelled data for custom NER to make the custom NER model better

In [3]:
from __future__ import unicode_literals,print_function
import pandas as pd
import plac
import random
from pathlib import Path
import spacy
from tqdm import tqdm

class TrainFoodNER():
    
    def __init__(self, df_path):
        self.df_path = df_path
        
    def generate_train_from_df(self):
        """This function generates the Train data in the format accepted by spacy from our csv file for training
           custom NER model and returns the training data variable created thereafter"""
        df = pd.read_excel(self.df_path)
        train = []
        gone = set()
        for index, value in df.iterrows():
            if index in gone:
                continue
            group = tuple()
            di = dict()
            li = []
            data = tuple()
            if pd.notnull(value['Start']) and pd.notnull(value['End']):
                data = data + (int(value['Start']),int(value['End']),value['Tag'].upper())
                li.append(data)
            c = index
            if ((c+1==df.shape[0])):
                break
            while(df.loc[c+1,'Review']==df.loc[index,'Review'] ):
                gone.add(c+1)
                data = tuple()
                if pd.notnull(df.loc[c+1,'Start']) and pd.notnull(df.loc[c+1,'End']):
                    data = data + (int(df.loc[c+1,'Start']),int(df.loc[c+1,'End']),df.loc[c+1,'Tag'].upper())
                    li.append(data)
                c = c+1
                if ((c+1==df.shape[0])):
                    break
            di['entities'] = li
            group = group + (value['Review'],di)
            train.append(group)
        return train

    def train_ner(self, output_dr):
        """This function trains the ner model and saves the model in directory provided as parameter to function"""
        """Parameters:
           output_dr -> Path where you want the model to be saved, must be string (Ex. '/users/xyz/desktop')
           """
        model = None
        output_dir=Path(output_dr)
        n_iter=100
        if model is not None:
            nlp = spacy.load(model)  # load existing spaCy model
            print("Loaded model '%s'" % model)
        else:
            nlp = spacy.blank('en')  # create blank Language class
            print("Created blank 'en' model")

        if 'ner' not in nlp.pipe_names:
            ner = nlp.create_pipe('ner')
            nlp.add_pipe(ner, last=True)
        # otherwise, get it so we can add labels
        else:
            ner = nlp.get_pipe('ner')

        # add labels
        TRAIN_DATA = self.generate_train_from_df()
        for _, annotations in TRAIN_DATA:
            for ent in annotations.get('entities'):
                ner.add_label(ent[2])

        other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
        with nlp.disable_pipes(*other_pipes):  # only train NER
            optimizer = nlp.begin_training()
            for itn in range(n_iter):
                random.shuffle(TRAIN_DATA)
                losses = {}
                print(itn)
                for text, annotations in tqdm(TRAIN_DATA):
                    nlp.update(
                        [text],  # batch of texts
                        [annotations],  # batch of annotations
                        drop=0.35,  # dropout - make it harder to memorise data
                        sgd=optimizer,  # callable to update weights
                        losses=losses)
                print(losses)
                if n_iter%10==0:
                    if output_dir is not None:
                        output_dir = Path(output_dir)
                        if not output_dir.exists():
                            output_dir.mkdir()
                        nlp.to_disk(output_dir)
                        print("Saved model to", output_dir)

        if output_dir is not None:
            output_dir = Path(output_dir)
            if not output_dir.exists():
                output_dir.mkdir()
            nlp.to_disk(output_dir)
            print("Saved model to", output_dir)
            
def generate_food_model(df_path,output_dir):
    """This function is the only one needed to be called to be called to generate FOOD NER model"""
    """Parameters:
       df_path : Path of excel file containing training data
       output_dir : Directory where you want the model to be saved (must be string, example : '/Users/XYZ/desktop')"""
    obj = TrainFoodNER(df_path = df_path)
    obj.train_ner(output_dir)

In [None]:
# Main and kinda only function to be called
# Path of data for model to be trained from (must be excel, else change in above class function if uploading csv),
# Path where you want to store the custom Model
generate_food_model('/Users/rishabh/Desktop/ProjectZomato/foodcsv.xlsx','/Users/rishabh/Desktop/ProjectZomato/FoodNER')

Created blank 'en' model


  ret = sqrt(sqnorm)
  0%|          | 1/316 [00:00<00:55,  5.68it/s]

0


100%|██████████| 316/316 [00:24<00:00, 12.67it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 949.6090647514052}
Saved model to /Users/rishabh/Desktop/ProjectZomato
1


100%|██████████| 316/316 [00:28<00:00, 10.91it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 597.3407825068917}
Saved model to /Users/rishabh/Desktop/ProjectZomato
2


100%|██████████| 316/316 [00:35<00:00,  8.93it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 400.19621750091596}
Saved model to /Users/rishabh/Desktop/ProjectZomato
3


100%|██████████| 316/316 [00:33<00:00,  9.36it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 319.493185696899}
Saved model to /Users/rishabh/Desktop/ProjectZomato
4


100%|██████████| 316/316 [00:45<00:00,  6.94it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 269.2614459972231}
Saved model to /Users/rishabh/Desktop/ProjectZomato
5


100%|██████████| 316/316 [01:23<00:00,  3.79it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 175.10473614891794}
Saved model to /Users/rishabh/Desktop/ProjectZomato
6


100%|██████████| 316/316 [01:22<00:00,  3.85it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 106.38820772487973}
Saved model to /Users/rishabh/Desktop/ProjectZomato
7


100%|██████████| 316/316 [01:21<00:00,  3.86it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 122.7609213058447}
Saved model to /Users/rishabh/Desktop/ProjectZomato
8


100%|██████████| 316/316 [01:22<00:00,  3.85it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 129.6804714860032}
Saved model to /Users/rishabh/Desktop/ProjectZomato
9


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 118.99109619411229}
Saved model to /Users/rishabh/Desktop/ProjectZomato
10


100%|██████████| 316/316 [01:19<00:00,  3.98it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 99.11831978045682}
Saved model to /Users/rishabh/Desktop/ProjectZomato
11


100%|██████████| 316/316 [01:18<00:00,  4.02it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 139.67257082049466}
Saved model to /Users/rishabh/Desktop/ProjectZomato
12


100%|██████████| 316/316 [01:18<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 114.85274772205406}
Saved model to /Users/rishabh/Desktop/ProjectZomato
13


100%|██████████| 316/316 [01:18<00:00,  4.01it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 112.53360010522957}
Saved model to /Users/rishabh/Desktop/ProjectZomato
14


100%|██████████| 316/316 [01:18<00:00,  4.02it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 128.39648306312432}
Saved model to /Users/rishabh/Desktop/ProjectZomato
15


100%|██████████| 316/316 [01:18<00:00,  4.02it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 103.6077574411466}
Saved model to /Users/rishabh/Desktop/ProjectZomato
16


100%|██████████| 316/316 [01:18<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 71.3498640418115}
Saved model to /Users/rishabh/Desktop/ProjectZomato
17


100%|██████████| 316/316 [01:18<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 60.085499893582224}
Saved model to /Users/rishabh/Desktop/ProjectZomato
18


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 114.03763593699506}
Saved model to /Users/rishabh/Desktop/ProjectZomato
19


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 100.87797078798249}
Saved model to /Users/rishabh/Desktop/ProjectZomato
20


100%|██████████| 316/316 [01:18<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 74.00581001244953}
Saved model to /Users/rishabh/Desktop/ProjectZomato
21


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 69.14695466296023}
Saved model to /Users/rishabh/Desktop/ProjectZomato
22


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 84.59693892893418}
Saved model to /Users/rishabh/Desktop/ProjectZomato
23


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 70.70066428475565}
Saved model to /Users/rishabh/Desktop/ProjectZomato
24


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 110.66410517989887}
Saved model to /Users/rishabh/Desktop/ProjectZomato
25


100%|██████████| 316/316 [01:19<00:00,  3.98it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 80.89746779870431}
Saved model to /Users/rishabh/Desktop/ProjectZomato
26


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 95.61266740655049}
Saved model to /Users/rishabh/Desktop/ProjectZomato
27


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 56.777571783831}
Saved model to /Users/rishabh/Desktop/ProjectZomato
28


100%|██████████| 316/316 [01:18<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 70.08030522733915}
Saved model to /Users/rishabh/Desktop/ProjectZomato
29


100%|██████████| 316/316 [01:19<00:00,  3.98it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 51.615746185409584}
Saved model to /Users/rishabh/Desktop/ProjectZomato
30


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 62.30030214084212}
Saved model to /Users/rishabh/Desktop/ProjectZomato
31


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 56.14033001245959}
Saved model to /Users/rishabh/Desktop/ProjectZomato
32


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 43.99995161591251}
Saved model to /Users/rishabh/Desktop/ProjectZomato
33


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 47.73168117415115}
Saved model to /Users/rishabh/Desktop/ProjectZomato
34


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 49.21666307440156}
Saved model to /Users/rishabh/Desktop/ProjectZomato
35


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 58.60152168550228}
Saved model to /Users/rishabh/Desktop/ProjectZomato
36


100%|██████████| 316/316 [01:19<00:00,  3.99it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 69.3839611690782}
Saved model to /Users/rishabh/Desktop/ProjectZomato
37


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 44.18079436816072}
Saved model to /Users/rishabh/Desktop/ProjectZomato
38


100%|██████████| 316/316 [43:20<00:00,  8.23s/it]   
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 37.56568052183313}
Saved model to /Users/rishabh/Desktop/ProjectZomato
39


100%|██████████| 316/316 [28:25<00:00,  5.40s/it]   
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 45.544527568708794}
Saved model to /Users/rishabh/Desktop/ProjectZomato
40


100%|██████████| 316/316 [01:23<00:00,  3.78it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 39.442914684171}
Saved model to /Users/rishabh/Desktop/ProjectZomato
41


100%|██████████| 316/316 [01:22<00:00,  3.83it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 52.39571330150808}
Saved model to /Users/rishabh/Desktop/ProjectZomato
42


100%|██████████| 316/316 [01:22<00:00,  3.82it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 56.60905537883906}
Saved model to /Users/rishabh/Desktop/ProjectZomato
43


100%|██████████| 316/316 [01:22<00:00,  3.83it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 35.59700479765862}
Saved model to /Users/rishabh/Desktop/ProjectZomato
44


100%|██████████| 316/316 [01:26<00:00,  3.64it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 45.31899827366481}
Saved model to /Users/rishabh/Desktop/ProjectZomato
45


100%|██████████| 316/316 [01:22<00:00,  3.83it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 38.942578666744595}
Saved model to /Users/rishabh/Desktop/ProjectZomato
46


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 17.664871377126467}
Saved model to /Users/rishabh/Desktop/ProjectZomato
47


100%|██████████| 316/316 [01:19<00:00,  3.96it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 41.766992638411296}
Saved model to /Users/rishabh/Desktop/ProjectZomato
48


100%|██████████| 316/316 [01:21<00:00,  3.85it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 57.25990131555286}
Saved model to /Users/rishabh/Desktop/ProjectZomato
49


100%|██████████| 316/316 [01:25<00:00,  3.70it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 33.507257504497474}
Saved model to /Users/rishabh/Desktop/ProjectZomato
50


100%|██████████| 316/316 [01:19<00:00,  4.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 21.691243101185556}
Saved model to /Users/rishabh/Desktop/ProjectZomato
51


100%|██████████| 316/316 [02:35<00:00,  2.04it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 28.139464272951454}
Saved model to /Users/rishabh/Desktop/ProjectZomato
52


100%|██████████| 316/316 [01:31<00:00,  3.44it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 34.55531734417773}
Saved model to /Users/rishabh/Desktop/ProjectZomato
53


100%|██████████| 316/316 [01:22<00:00,  3.82it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 17.44986670484648}
Saved model to /Users/rishabh/Desktop/ProjectZomato
54


100%|██████████| 316/316 [01:21<00:00,  3.86it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 15.773773676125472}
Saved model to /Users/rishabh/Desktop/ProjectZomato
55


100%|██████████| 316/316 [01:21<00:00,  3.89it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 16.811416364028272}
Saved model to /Users/rishabh/Desktop/ProjectZomato
56


100%|██████████| 316/316 [01:31<00:00,  3.44it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 29.45405853188348}
Saved model to /Users/rishabh/Desktop/ProjectZomato
57


100%|██████████| 316/316 [01:26<00:00,  3.64it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 37.08534667156425}
Saved model to /Users/rishabh/Desktop/ProjectZomato
58


100%|██████████| 316/316 [01:49<00:00,  2.89it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 45.37545682494408}
Saved model to /Users/rishabh/Desktop/ProjectZomato
59


100%|██████████| 316/316 [01:24<00:00,  3.74it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 42.569562614233575}
Saved model to /Users/rishabh/Desktop/ProjectZomato
60


100%|██████████| 316/316 [01:30<00:00,  3.51it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 41.066555131742525}
Saved model to /Users/rishabh/Desktop/ProjectZomato
61


100%|██████████| 316/316 [01:47<00:00,  2.94it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 18.179007187697845}
Saved model to /Users/rishabh/Desktop/ProjectZomato
62


100%|██████████| 316/316 [01:30<00:00,  3.51it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 29.440961600485355}
Saved model to /Users/rishabh/Desktop/ProjectZomato
63


100%|██████████| 316/316 [01:24<00:00,  3.75it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 36.25377514444109}
Saved model to /Users/rishabh/Desktop/ProjectZomato
64


100%|██████████| 316/316 [01:22<00:00,  3.84it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 30.693713040779247}
Saved model to /Users/rishabh/Desktop/ProjectZomato
65


100%|██████████| 316/316 [01:23<00:00,  3.76it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 8.916052424231822}
Saved model to /Users/rishabh/Desktop/ProjectZomato
66


100%|██████████| 316/316 [01:19<00:00,  3.97it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 54.635843603023304}
Saved model to /Users/rishabh/Desktop/ProjectZomato
67


100%|██████████| 316/316 [01:19<00:00,  3.95it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 19.922772502476782}
Saved model to /Users/rishabh/Desktop/ProjectZomato
68


100%|██████████| 316/316 [01:20<00:00,  3.92it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 23.32165541880214}
Saved model to /Users/rishabh/Desktop/ProjectZomato
69


100%|██████████| 316/316 [01:23<00:00,  3.80it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 55.59195332109655}
Saved model to /Users/rishabh/Desktop/ProjectZomato
70


100%|██████████| 316/316 [01:22<00:00,  3.81it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 28.73727503705179}
Saved model to /Users/rishabh/Desktop/ProjectZomato
71


100%|██████████| 316/316 [01:24<00:00,  3.74it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 38.88289358503635}
Saved model to /Users/rishabh/Desktop/ProjectZomato
72


100%|██████████| 316/316 [01:22<00:00,  3.82it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 10.52453076314381}
Saved model to /Users/rishabh/Desktop/ProjectZomato
73


100%|██████████| 316/316 [01:35<00:00,  3.31it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 9.156681433605582}
Saved model to /Users/rishabh/Desktop/ProjectZomato
74


100%|██████████| 316/316 [02:14<00:00,  2.35it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 17.69021366035576}
Saved model to /Users/rishabh/Desktop/ProjectZomato
75


100%|██████████| 316/316 [01:44<00:00,  3.03it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 18.67408517434166}
Saved model to /Users/rishabh/Desktop/ProjectZomato
76


100%|██████████| 316/316 [01:24<00:00,  3.76it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 42.318157773716244}
Saved model to /Users/rishabh/Desktop/ProjectZomato
77


100%|██████████| 316/316 [01:21<00:00,  3.88it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 24.935439814403075}
Saved model to /Users/rishabh/Desktop/ProjectZomato
78


100%|██████████| 316/316 [01:36<00:00,  3.28it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 16.42716622206311}
Saved model to /Users/rishabh/Desktop/ProjectZomato
79


100%|██████████| 316/316 [01:36<00:00,  3.29it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 32.70123484758959}
Saved model to /Users/rishabh/Desktop/ProjectZomato
80


100%|██████████| 316/316 [01:34<00:00,  3.33it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 24.643457840244732}
Saved model to /Users/rishabh/Desktop/ProjectZomato
81


100%|██████████| 316/316 [01:35<00:00,  3.32it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 37.66389677819797}
Saved model to /Users/rishabh/Desktop/ProjectZomato
82


100%|██████████| 316/316 [02:01<00:00,  2.59it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 23.485638729883153}
Saved model to /Users/rishabh/Desktop/ProjectZomato
83


100%|██████████| 316/316 [01:56<00:00,  2.71it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 11.656191329620233}
Saved model to /Users/rishabh/Desktop/ProjectZomato
84


100%|██████████| 316/316 [01:22<00:00,  3.83it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 22.323168017723596}
Saved model to /Users/rishabh/Desktop/ProjectZomato
85


100%|██████████| 316/316 [01:23<00:00,  3.79it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 22.331312921602578}
Saved model to /Users/rishabh/Desktop/ProjectZomato
86


100%|██████████| 316/316 [01:32<00:00,  3.42it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 52.2849338938207}
Saved model to /Users/rishabh/Desktop/ProjectZomato
87


100%|██████████| 316/316 [01:33<00:00,  3.38it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 32.24430942819524}
Saved model to /Users/rishabh/Desktop/ProjectZomato
88


100%|██████████| 316/316 [01:45<00:00,  3.00it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 18.831979463981767}
Saved model to /Users/rishabh/Desktop/ProjectZomato
89


100%|██████████| 316/316 [01:30<00:00,  3.51it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 18.031419631554943}
Saved model to /Users/rishabh/Desktop/ProjectZomato
90


100%|██████████| 316/316 [01:32<00:00,  3.43it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 7.248129414602318}
Saved model to /Users/rishabh/Desktop/ProjectZomato
91


100%|██████████| 316/316 [01:27<00:00,  3.60it/s]


{'ner': 15.616913405228782}


  0%|          | 0/316 [00:00<?, ?it/s]

Saved model to /Users/rishabh/Desktop/ProjectZomato
92


100%|██████████| 316/316 [01:26<00:00,  3.66it/s]


{'ner': 34.04200002487114}


  0%|          | 0/316 [00:00<?, ?it/s]

Saved model to /Users/rishabh/Desktop/ProjectZomato
93


100%|██████████| 316/316 [01:35<00:00,  3.30it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 24.609552644879965}
Saved model to /Users/rishabh/Desktop/ProjectZomato
94


100%|██████████| 316/316 [01:30<00:00,  3.49it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 35.69911901406183}
Saved model to /Users/rishabh/Desktop/ProjectZomato
95


100%|██████████| 316/316 [01:42<00:00,  3.09it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 40.749161564783215}
Saved model to /Users/rishabh/Desktop/ProjectZomato
96


100%|██████████| 316/316 [01:47<00:00,  2.94it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 6.782341316921996}
Saved model to /Users/rishabh/Desktop/ProjectZomato
97


100%|██████████| 316/316 [01:29<00:00,  3.51it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 23.575507888034096}
Saved model to /Users/rishabh/Desktop/ProjectZomato
98


100%|██████████| 316/316 [01:27<00:00,  3.62it/s]
  0%|          | 0/316 [00:00<?, ?it/s]

{'ner': 11.026393699172496}
Saved model to /Users/rishabh/Desktop/ProjectZomato
99


100%|██████████| 316/316 [01:26<00:00,  3.67it/s]


{'ner': 27.150179142321708}
Saved model to /Users/rishabh/Desktop/ProjectZomato
Saved model to /Users/rishabh/Desktop/ProjectZomato


# Custom NER performance

Since I only have 316 examples, I haven't split the dataset before into train and validation. So the metric scores we'll be getting will be on train data below. I have also retrained the model again dividing data into train and validation (files foodcsvtrain.xlsx and foodcsvtest.xlsx) which will be right after train precision calculated score

I will be using **Precision Score** as the main metric for model evaluation. 

This is so because I am more interested to find out how many of the named entities is my model mis-classifying as being a food item. Anything classified multiple times as a food item with high rating will be shown to any end user and obviously you don't want it to output things like 'CP', 'Kirti Nagar Momos', 'Barbeque Nation'. If intuition of using precision is still not clear enough to you, take an example : A food item which is not being recognised as a food item will not affect our output that much since if it's trending or that good, there will be a lot of reviews having that and chances of our NER model recognising it increases. However a word or a Group of words recognised as food item which essentially are not will look wrongfully put in the output.

In [4]:
def get_precision(model_path,train_path):
    """The function takes in model path of Custom Food NER and file path of data it was trained from and returns
    the precision score of the model on train data"""
    nlp = spacy.load(model_path)
    tp=0
    fp=0
    obj = TrainFoodNER('/Users/rishabh/Desktop/ProjectZomato/foodcsv.xlsx')
    TRAIN_DATA = obj.generate_train_from_df()
    for items in TRAIN_DATA:
        doc = nlp(items[0])
        k_list = []
        for its in items[1]['entities']:
            k_list.append(items[0][its[0]:its[1]])
        for tokens in doc.ents:
            if tokens.text in k_list:
                tp+=1
            else:
                fp+=1
    return tp/(tp+fp)

In [36]:
get_precision('/Users/rishabh/Desktop/ProjectZomato/FoodNER','/Users/rishabh/Desktop/ProjectZomato/foodcsv.xlsx')

0.9097222222222222

A precision of 0.90 is good but two things are to be kept in mind, it's on train data, it can be so that the model is overfitting. To find this out I can divide 316 examples I trained on above into train and validation, construct model on train and evaluate on validation which I have done below.

## Let's try to train it on 245 examples and validate on remaining 66 examples:

In [None]:
generate_food_model('/Users/rishabh/Desktop/ProjectZomato/foodcsvtrain.xlsx','/Users/rishabh/Desktop/ProjectZomato/trainNER')

Created blank 'en' model


  ret = sqrt(sqnorm)
  0%|          | 1/245 [00:00<00:42,  5.74it/s]

0


100%|██████████| 245/245 [00:20<00:00, 12.24it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 754.4667709981615}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
1


100%|██████████| 245/245 [00:31<00:00,  7.86it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 492.61260801699046}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
2


100%|██████████| 245/245 [00:25<00:00,  9.49it/s]
  0%|          | 1/245 [00:00<00:28,  8.59it/s]

{'ner': 426.1664253400353}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
3


100%|██████████| 245/245 [00:26<00:00,  9.29it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 303.4432270535632}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
4


100%|██████████| 245/245 [00:22<00:00, 10.89it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 237.98554882802455}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
5


100%|██████████| 245/245 [00:28<00:00,  8.59it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 186.91682110726083}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
6


100%|██████████| 245/245 [00:53<00:00,  4.57it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 124.46890507200698}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
7


100%|██████████| 245/245 [01:12<00:00,  3.38it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 112.65537614249038}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
8


100%|██████████| 245/245 [01:16<00:00,  3.19it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 85.0363549892019}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
9


100%|██████████| 245/245 [01:22<00:00,  2.97it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 67.12187198191972}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
10


100%|██████████| 245/245 [01:15<00:00,  3.26it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 76.77383963822652}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
11


100%|██████████| 245/245 [01:12<00:00,  3.37it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 56.90762306307647}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
12


100%|██████████| 245/245 [01:10<00:00,  3.47it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 80.7435216082795}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
13


100%|██████████| 245/245 [01:10<00:00,  3.49it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 99.13243921382923}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
14


100%|██████████| 245/245 [01:08<00:00,  3.58it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 97.1233011848562}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
15


100%|██████████| 245/245 [01:08<00:00,  3.56it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 79.42214599330006}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
16


100%|██████████| 245/245 [01:31<00:00,  2.67it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 80.07245702789614}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
17


100%|██████████| 245/245 [01:17<00:00,  3.15it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 113.32999012985084}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
18


100%|██████████| 245/245 [01:19<00:00,  3.07it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 62.78786989157245}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
19


100%|██████████| 245/245 [01:10<00:00,  3.47it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 79.57962307835001}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
20


100%|██████████| 245/245 [01:08<00:00,  3.58it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 74.49107160281041}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
21


100%|██████████| 245/245 [01:09<00:00,  3.54it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 62.13786417330621}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
22


100%|██████████| 245/245 [01:10<00:00,  3.46it/s]


{'ner': 55.639988055487336}


  0%|          | 0/245 [00:00<?, ?it/s]

Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
23


100%|██████████| 245/245 [01:21<00:00,  3.01it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 79.27105247120333}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
24


100%|██████████| 245/245 [01:07<00:00,  3.62it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 51.75858443650964}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
25


100%|██████████| 245/245 [01:09<00:00,  3.51it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 63.480869945405814}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
26


100%|██████████| 245/245 [01:21<00:00,  3.01it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 45.128626307220216}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
27


100%|██████████| 245/245 [01:11<00:00,  3.43it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 67.13778658674913}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
28


100%|██████████| 245/245 [01:19<00:00,  3.10it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 48.91419890111815}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
29


100%|██████████| 245/245 [01:10<00:00,  3.47it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 42.227527116822536}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
30


100%|██████████| 245/245 [01:17<00:00,  3.18it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 60.63340860428047}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
31


100%|██████████| 245/245 [01:10<00:00,  3.45it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 43.49311323165101}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
32


100%|██████████| 245/245 [01:14<00:00,  3.27it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 61.64658419531899}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
33


100%|██████████| 245/245 [01:16<00:00,  3.19it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 44.48403385142401}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
34


100%|██████████| 245/245 [01:22<00:00,  2.99it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 71.61980004081065}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
35


100%|██████████| 245/245 [01:19<00:00,  3.07it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 47.25201814764113}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
36


100%|██████████| 245/245 [01:05<00:00,  3.75it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 56.99664870982927}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
37


100%|██████████| 245/245 [01:04<00:00,  3.79it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 32.37837358301699}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
38


100%|██████████| 245/245 [01:04<00:00,  3.77it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 50.294420570220176}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
39


100%|██████████| 245/245 [01:05<00:00,  3.76it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 31.55434378667148}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
40


100%|██████████| 245/245 [01:05<00:00,  3.72it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 58.492046012669874}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
41


100%|██████████| 245/245 [01:05<00:00,  3.76it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 15.811096172331329}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
42


100%|██████████| 245/245 [01:05<00:00,  3.73it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 23.941733084931812}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
43


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 29.334867115640623}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
44


100%|██████████| 245/245 [01:05<00:00,  3.73it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 29.30754206564195}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
45


100%|██████████| 245/245 [01:05<00:00,  3.77it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 38.77508390546532}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
46


100%|██████████| 245/245 [01:05<00:00,  3.73it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 40.23039197717104}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
47


100%|██████████| 245/245 [01:05<00:00,  3.75it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 64.49285024993054}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
48


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 46.76876641657235}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
49


100%|██████████| 245/245 [01:06<00:00,  3.71it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 43.687394273023806}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
50


100%|██████████| 245/245 [01:04<00:00,  3.77it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 24.229295718428236}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
51


100%|██████████| 245/245 [01:05<00:00,  3.76it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 28.844854889407195}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
52


100%|██████████| 245/245 [01:05<00:00,  3.77it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 17.63896968308447}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
53


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 36.03718729221527}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
54


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 34.858116304449936}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
55


100%|██████████| 245/245 [01:05<00:00,  3.73it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 15.219444864564254}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
56


100%|██████████| 245/245 [01:05<00:00,  3.71it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 6.8047408965742875}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
57


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 21.44180977272732}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
58


100%|██████████| 245/245 [01:05<00:00,  3.72it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 15.223599688454133}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
59


100%|██████████| 245/245 [01:06<00:00,  3.70it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 13.356838815148842}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
60


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 36.983693005283}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
61


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 44.852007434984024}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
62


100%|██████████| 245/245 [01:06<00:00,  3.70it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 44.24436371471139}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
63


100%|██████████| 245/245 [01:06<00:00,  3.71it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 19.46371435752305}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
64


100%|██████████| 245/245 [01:05<00:00,  3.72it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 21.625535589855062}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
65


100%|██████████| 245/245 [01:05<00:00,  3.73it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 38.049478136197536}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
66


100%|██████████| 245/245 [01:05<00:00,  3.72it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 19.378721778991874}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
67


100%|██████████| 245/245 [01:06<00:00,  3.71it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 28.952881163546543}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
68


100%|██████████| 245/245 [01:06<00:00,  3.69it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 28.23950502172402}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
69


100%|██████████| 245/245 [01:06<00:00,  3.71it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 24.91977150420249}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
70


100%|██████████| 245/245 [01:05<00:00,  3.74it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 34.3339095276058}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
71


100%|██████████| 245/245 [01:21<00:00,  3.01it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 20.79087176882516}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
72


100%|██████████| 245/245 [01:13<00:00,  3.35it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 25.016982964048275}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
73


100%|██████████| 245/245 [01:15<00:00,  3.26it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 11.887979047342267}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
74


100%|██████████| 245/245 [01:11<00:00,  3.42it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 20.827286099569754}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
75


100%|██████████| 245/245 [01:14<00:00,  3.30it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 23.312346185189245}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
76


100%|██████████| 245/245 [01:07<00:00,  3.62it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 20.673176465846733}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
77


100%|██████████| 245/245 [01:07<00:00,  3.61it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 17.829429860032445}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
78


100%|██████████| 245/245 [01:10<00:00,  3.49it/s]


{'ner': 19.892557387229274}


  0%|          | 0/245 [00:00<?, ?it/s]

Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
79


100%|██████████| 245/245 [01:14<00:00,  3.29it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 18.71100295103869}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
80


100%|██████████| 245/245 [01:12<00:00,  3.39it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 18.035216465760385}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
81


100%|██████████| 245/245 [01:08<00:00,  3.56it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 33.90571349471999}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
82


100%|██████████| 245/245 [01:10<00:00,  3.48it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 11.235739882388547}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
83


100%|██████████| 245/245 [01:10<00:00,  3.50it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 16.575881089950283}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
84


100%|██████████| 245/245 [01:15<00:00,  3.24it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 22.506388963879218}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
85


100%|██████████| 245/245 [01:19<00:00,  3.07it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 12.794615922445177}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
86


100%|██████████| 245/245 [01:10<00:00,  3.48it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 19.742744849123046}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
87


100%|██████████| 245/245 [01:15<00:00,  3.24it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 39.978200340464234}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
88


100%|██████████| 245/245 [01:11<00:00,  3.44it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 12.495589762953738}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
89


100%|██████████| 245/245 [01:17<00:00,  3.17it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 18.413361531996433}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
90


100%|██████████| 245/245 [01:22<00:00,  2.98it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 30.65964632246064}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
91


100%|██████████| 245/245 [01:13<00:00,  3.34it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 26.040537965836794}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
92


100%|██████████| 245/245 [01:12<00:00,  3.40it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 9.663942622750726}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
93


100%|██████████| 245/245 [01:11<00:00,  3.43it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 24.022594165286765}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
94


100%|██████████| 245/245 [01:08<00:00,  3.56it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 28.9309704211467}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
95


100%|██████████| 245/245 [01:08<00:00,  3.60it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 25.641669317577286}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
96


100%|██████████| 245/245 [01:07<00:00,  3.61it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 15.517493143371455}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
97


100%|██████████| 245/245 [01:10<00:00,  3.49it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 25.161231756501675}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
98


100%|██████████| 245/245 [01:07<00:00,  3.65it/s]
  0%|          | 0/245 [00:00<?, ?it/s]

{'ner': 15.362498195109108}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
99


100%|██████████| 245/245 [01:07<00:00,  3.64it/s]


{'ner': 17.557805938945133}
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER
Saved model to /Users/rishabh/Desktop/ProjectZomato/trainNER


In [5]:
get_precision('/Users/rishabh/Desktop/ProjectZomato/trainNER','/Users/rishabh/Desktop/ProjectZomato/foodcsvtest.xlsx')

0.8337236533957846

A precision of 83% on validation is decent enough I would say, spacy did a fantastic job with just 245 samples for training. Although keep in mind These samples should be increased to make model better and more robust. It might have been a sampling skew from my manual data collection that I only picked training examples of certain kind. And obviously 245 examples cannot be close to representing all kinds of reviews on zomato. But nevertheless it's still better. I will be using model trained on entire data and not just train and validation data.