# Package Installation

In [1]:
pip install gradio

Collecting gradio
  Downloading gradio-2.9.4-py3-none-any.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 4.3 MB/s 
[?25hCollecting fastapi
  Downloading fastapi-0.75.2-py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 1.8 MB/s 
[?25hCollecting python-multipart
  Downloading python-multipart-0.0.5.tar.gz (32 kB)
Collecting paramiko
  Downloading paramiko-2.10.3-py2.py3-none-any.whl (211 kB)
[K     |████████████████████████████████| 211 kB 65.7 MB/s 
Collecting orjson
  Downloading orjson-3.6.8-cp37-cp37m-manylinux_2_24_x86_64.whl (253 kB)
[K     |████████████████████████████████| 253 kB 51.4 MB/s 
[?25hCollecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 43.5 MB/s 
[?25hCollecting pycryptodome
  Downloading pycryptodome-3.14.1-cp35-abi3-manylinux2010_x86_64.whl (2.0 MB)
[K     |███████████████

In [2]:
!python -m spacy download en_core_web_lg

Collecting en_core_web_lg==2.2.5
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-2.2.5/en_core_web_lg-2.2.5.tar.gz (827.9 MB)
[K     |████████████████████████████████| 827.9 MB 1.2 MB/s 
Building wheels for collected packages: en-core-web-lg
  Building wheel for en-core-web-lg (setup.py) ... [?25l[?25hdone
  Created wheel for en-core-web-lg: filename=en_core_web_lg-2.2.5-py3-none-any.whl size=829180942 sha256=376288a5bbd055876c28a03b2fc780c7fa4aa02c13f7f0b18c83c66515826337
  Stored in directory: /tmp/pip-ephem-wheel-cache-7exshupd/wheels/11/95/ba/2c36cc368c0bd339b44a791c2c1881a1fb714b78c29a4cb8f5
Successfully built en-core-web-lg
Installing collected packages: en-core-web-lg
Successfully installed en-core-web-lg-2.2.5
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_lg')


# Import Packages

In [3]:
import numpy as np
import pandas as pd
import gradio as gr
import warnings
from pathlib import Path
import tensorflow as tf
import os.path
import matplotlib.pyplot as plt
from keras.models import load_model
import json
import spacy
import random
from spacy.tokens import Doc
from spacy.util import minibatch, compounding
import en_core_web_lg
import warnings
# Helper libraries
import matplotlib.pyplot as pl
import nltk
nltk.download('reuters')
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import reuters
from nltk.tokenize import MWETokenizer
from nltk import sent_tokenize, word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import spacy
nlp = spacy.load("en_core_web_sm")
from collections import Counter
warnings.filterwarnings('ignore')

[nltk_data] Downloading package reuters to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


# Google Drive

In [4]:
from google.colab import drive
import sys

# Mount Google Drive
drive.mount('/content/drive')

# Get the absolute path of the current folder
abspath_curr = '/content/drive/My Drive/'

Mounted at /content/drive


# Import Data

## Import recipe data

In [5]:
with open (abspath_curr + 'annotations1.json') as f:
    training_data = json.load(f)

In [6]:
training_data.pop('classes')

['FOOD']

## Import FDC data

In [7]:

protein = pd.read_csv('/content/drive/My Drive/NLP_food_protein.csv')
protein_df = pd.DataFrame(protein)

nutrient = pd.read_csv('/content/drive/My Drive/NLP_food_nutrient.csv')
nutrient_df = pd.DataFrame(nutrient)

# Loading Model

In [8]:
ner_model = spacy.load("/content/drive/My Drive/ner_model")

# Functions

## Provide info based on input from FDC data

### Information extraction from nutrient data

In [9]:
def nutrientcheck(fdcid):
  stringlist = ""
  foodlist = nutrient_df[nutrient_df['fdc_id'] == fdcid]
  i = 0
  nutrient = []
  while i < len(foodlist):
      item = foodlist.iloc[i,]
      nutrientname = item['name']
      if nutrientname not in nutrient:
        nutrient.append(item['name'])
        nutrientamount = item['amount']
        unit = item['unit_name']
        singlestring = """{}: {}{}.\n""".format(nutrientname, nutrientamount, unit)
        stringlist = stringlist + singlestring
      i += 1
  return stringlist

### Information extraction from protein data

In [10]:
def proteincheck(input):
  totalstring = ""
  for food in input:
    stringlist = """Information for {} products: \n\n""".format(food)
    foodlist = protein_df.loc[protein_df['food_name'].str.contains(food, case=False)]
    i = 0
    while i < len(foodlist):
      item = foodlist.iloc[i,]
      des = item['description']
      proteinval = item['protein_value'] * 4
      fatval = item['fat_value'] * 9
      carbonval = item['carbohydrate_value'] * 4
      total = proteinval + fatval + carbonval
      fdcid = item['fdc_id']
      singlestring = """Food Name: {}.\nTotal Calories: {}Kcal.\nProteins: {}Kcal.\nFat: {}Kcal.\nCarbonhydrate: {}Kcal.\n""".format(des, total, proteinval, fatval, carbonval)
      nutrientstring = nutrientcheck(fdcid)
      stringlist = stringlist + singlestring + nutrientstring + "\n"
      i += 1
    totalstring = totalstring + stringlist
  return totalstring

## Word Tokenization

### Single Word Tokenization

In [11]:
def food_tokenize(document):
    word_list = []
    analyzed = nlp(document)
    # Loop through the word list
    for token in analyzed:
        if token.is_alpha and not token.is_stop:
            possible_add = token.lemma_.lower()       #Lemmatize and lowercase
            word_list.append(possible_add)
    return word_list

### Multiword Tokenization

In [12]:
def multiword_tokenize(doc, num_words):
    multiword_list = []
    num_words_lemma = []
    # Set stop words and lemmatizer
    stops = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    doc_lemma = ""
    for x in doc.split():
      x = x.replace(".", "")
      x = lemmatizer.lemmatize(x)
      doc_lemma = doc_lemma + " " + x
    for y in num_words:
      y = y.replace(".", "")
      y = lemmatizer.lemmatize(y)
      num_words_lemma.append(y)

    # Set up multiple word expressions
    mwe = MWETokenizer([num_words_lemma])
    doc_nocomma = doc_lemma.replace(',','')
    multi_analyzed = mwe.tokenize(doc_lemma.split())
    
    #Loop through the word list
    for multi_token in multi_analyzed:
        if multi_token.isalpha and multi_token not in stops:
            # Lowercase
            possible_add = multi_token.lower()
            # Lemmatize
            
            multiword_list.append(possible_add)
            
    return multiword_list

## Prediction model Function

In [13]:
def nermodel(doc):
  tokenlist = []
  doc = ner_model(doc)
  for ent in doc.ents:
      token = ent.text
      tokenlist.append(token)
  return tokenlist

In [14]:
def tokenize(doc):
  foods = []
  foodlist = nermodel(doc)
  delimiter = ','
  for food in foodlist:
      food_name = food.partition(',')[0]       #Split food descriptions by commas
      num_words = food_name.split()        # Create a list of words using food descriptions
      # First word phrase is the name of the food, if the word phrase only have one word, tokenize it. Otherwise, use multword_tokenize function.
      if len(num_words) == 1:                
        token = food_tokenize(food)
        for x in token:
          foods.append(x)
      elif len(num_words) > 1:
        multifood_token = multiword_tokenize(food, num_words)
        for x in multifood_token:
          foods.append(x)
  return foods

## Text Recognition Function

In [15]:
def recipe(sentences):
  food_list = tokenize(sentences)
  print(food_list)
  foodlisttext = 'Food recognized from text: ' + ', '.join(food_list) + '.\n\n'
  foodinfo = foodlisttext + proteincheck(food_list)
  return foodinfo



# Create Gradio Interface

In [16]:
outputs = gr.outputs.Textbox()

app = gr.Interface(fn=recipe, inputs=['text'], outputs=outputs,description="Food Information Extraction (Text Recognition)")

# Launch the Gradio Web App

In [17]:
doc = "I would like celery and peanut butter today."
app.launch()

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://43964.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<fastapi.applications.FastAPI at 0x7f17685bd450>,
 'http://127.0.0.1:7860/',
 'https://43964.gradio.app')