In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats as st
from os import listdir, mkdir
from bs4 import BeautifulSoup
import requests
import hashlib
import urllib.request
import json
from IPython.display import Image
from IPython.core.display import HTML 

from foodpal import get_from_page
from weightloss import get_from_url
from fddb import get_info_from_url, get_urls_from_groups

# FDDB

In [2]:
print(get_urls_from_groups('https://fddb.info/db/en/groups/dishes/index.html')[15])
print(get_info_from_url(get_urls_from_groups('https://fddb.info/db/en/groups/dishes/index.html')[15]))
Image(url= "dish_f006a07a30e5d4ec3a3a7ff899b4f679/rgb.png")

https://fddb.info/db/en/food/baker_vegetable_honeycomb/index.html
{'dataset_info': 'fddb', 'dish_id': 'dish_f006a07a30e5d4ec3a3a7ff899b4f679', 'path_default': 'dish_f006a07a30e5d4ec3a3a7ff899b4f679/rgb.png', 'path_additional': None, 'mass': 200.0, 'kcal_total': 418.0, 'prot_total': 18.0, 'fat_total': 20.0, 'carb_total': 42.0, 'kcal_100': 209.0, 'prot_100': 9.0, 'fat_100': 10.0, 'carb_100': 21.0, 'ingridients': None, 'text': 'Vegetable honeycomb, Dishes', 'img_exist': 1, 'link': 'https://fddb.info/static/db/980/8NQB3KUQE7VU992HU2PWUU17.jpg', 'url': 'https://fddb.info/db/en/food/baker_vegetable_honeycomb/index.html'}


# foodpal

In [3]:
categories = {}
for i in ['', '/2', '/3', '/4']:
    page = requests.get('https://www.foodpal-app.com/en/calorie-table' + i)
    page = BeautifulSoup(page.text, "html.parser")
    for j in page.find_all('a', class_='d-block mb-4 reco'):
        categories[j.attrs['title']] = j.attrs['href']
categories

{'Animal fats': 'https://www.foodpal-app.com/en/calorie-table/animal-fats',
 'Baking ingredients': 'https://www.foodpal-app.com/en/calorie-table/baking-ingredients',
 'Bread, buns and pastries': 'https://www.foodpal-app.com/en/calorie-table/bread-buns-and-pastries',
 'Cakes and pies': 'https://www.foodpal-app.com/en/calorie-table/cakes-and-pies',
 'Canned fish': 'https://www.foodpal-app.com/en/calorie-table/canned-fish',
 'Canned fruit': 'https://www.foodpal-app.com/en/calorie-table/canned-fruit',
 'Canned vegetables': 'https://www.foodpal-app.com/en/calorie-table/canned-vegetables',
 'Cereal products': 'https://www.foodpal-app.com/en/calorie-table/cereal-products',
 'cheese': 'https://www.foodpal-app.com/en/calorie-table/cheese',
 'chocolate': 'https://www.foodpal-app.com/en/calorie-table/chocolate',
 'Coffee drinks': 'https://www.foodpal-app.com/en/calorie-table/coffee-drinks',
 'Cold cuts': 'https://www.foodpal-app.com/en/calorie-table/cold-cuts',
 'Confectionery': 'https://www.food

In [4]:
page = requests.get(categories['Vegan dishes'])
page = BeautifulSoup(page.text, "html.parser")
i = page.find_all(
    'a',style="color:#333;display:block;margin-top:2px;padding:2px 0px;border-bottom:1px solid #eaeaea;")[-1]
i

<a href="https://www.foodpal-app.com/en/calorie-table/p/tofu-natural" style="color:#333;display:block;margin-top:2px;padding:2px 0px;border-bottom:1px solid #eaeaea;" title="Tofu (natural)">
<div class="hover"><div style="width:60px;float:left;"><img alt="Tofu (natural)" class="lazy img-fluid" data-src="https://www.foodpal-app.com/uploads/images/food/9446/tofu-natur-604a65311505b-50.webp" height="50" src="https://www.foodpal-app.com/datas/images/homepage/blank.svg" style="display:inline-block;margin-right:10px;height:50px;" title="Tofu (natural)" width="50"/></div><div style="float:left;width:calc(100% - 60px);"><div class="headline">Tofu (natural)</div><span style="font-size:0.7em;">Listed in the category Vegan dishes</span></div><div style="clear:both;"></div></div>
</a>

In [5]:
url, name = i.attrs['href'], i.attrs['title']
dct = get_from_page(url, name)
dct['text'] = f'{name}, Vegan dishes'
print(dct)
Image(url= "tofu-natur-604a65311505b-800.png")

{'dataset_info': 'foodpal', 'path_default': 'tofu-natur-604a65311505b-800.png', 'kcal_100': 162.0, 'prot_100': 17.0, 'fat_100': 9.0, 'carb_100': 0.0, 'text': 'Tofu (natural), Vegan dishes'}


# weightloss

In [6]:
page = requests.get('https://www.weightloss.com.au/healthy-recipes/')
page = BeautifulSoup(page.text, "html.parser")
categories = {}
for i in page.find_all('a')[6:45]:
    if 'href' in i.attrs and 'title' in i.attrs:
        p = requests.get('https://www.weightloss.com.au' + i.attrs['href'])
        p = BeautifulSoup(p.text, "html.parser")
        categories[i.attrs['title']] = ['https://www.weightloss.com.au' + i.attrs['href']]
        if p.find('ul', class_="pagination_link"): 
            categories[i.attrs['title']] += list(pd.unique(
                ['https://www.weightloss.com.au/' + j.attrs['href'].replace(' ', '') 
                 for j in p.find('ul', class_="pagination_link").find_all('a')])[1:])
categories

{'Dinner Recipes': ['https://www.weightloss.com.au/healthy-recipes/dinner-recipes/',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=24',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=48',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=72',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=96',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=120',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=144',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=168',
  'https://www.weightloss.com.au/healthy-recipes/dinner-recipes/?start=192'],
 'Lunch Recipes': ['https://www.weightloss.com.au/healthy-recipes/lunch-recipes/',
  'https://www.weightloss.com.au/healthy-recipes/lunch-recipes/?start=24',
  'https://www.weightloss.com.au/healthy-recipes/lunch-recipes/?start=48',
  'https://www.weightloss.com.au/healthy-recipes/lunch-recipes/?start

In [7]:
p = requests.get(categories['Soup Recipes'][-1])
p = BeautifulSoup(p.text, "html.parser")
dish = p.find('div', class_="LinkList__item").find('a', class_="LinkList__item__title")
name, link = dish.contents[0], 'https://www.weightloss.com.au' + dish.attrs['href']
name, link

('Combination Chinese Soup',
 'https://www.weightloss.com.au/healthy-recipes/soup-recipes/combination-chinese-soup/')

In [8]:
page = requests.get(link)
page = BeautifulSoup(page.text, "html.parser")
dct = get_from_url(page)
dct['text'] = f'Soup Recipes, {name}. {dct["text"]}'
print(dct)
Image(url= "combination-soup-lge.png")

{'path_default': 'combination-soup-lge.png', 'kcal_100': 51.932367149758456, 'prot_100': 4.468599033816425, 'fat_100': 0.38647342995169087, 'carb_100': 7.198067632850242, 'mass': 827.9999999999999, 'text': 'Soup Recipes, Combination Chinese Soup. Asian soups are traditionally broth based with no dairy. You can use any vegetables in your fridge with tender beef and noodles, which will give you a hearty soup.', 'dataset_info': 'weightloss', 'kcal': 430.0, 'prot': 37.0, 'fat': 3.2, 'carb': 59.6}
