In [1]:
import pandas as pd
import sqlite3

import sys, os

import utils.mining_data_tb as md
import utils.sql_tb as sq

import warnings

warnings.filterwarnings("ignore")

In [2]:
# Connection to database
connection = sqlite3.connect("health_canada.db")
# Extracting data from csv files
tables = md.data_to_tables("data/")
# Creating database and tables
sq.tables_to_sql(connection, tables)

Something went wrong


Source: [Source](https://sqlbolt.com/lesson/select_queries_order_of_execution)

# SQL STATEMENTS ORDER

## Writing order
[Image source](https://sqlbolt.com/lesson/select_queries_order_of_execution)
<img src="documents/writing_order.png">

## Execution order
[Image source](https://learnsql.com/blog/sql-order-of-operations/)
<img src="documents/execution_order.png">

In [3]:
query = """
SELECT f.FoodDescription, fg.FoodGroupName, ya.YieldAmount, yn.YieldDescription
FROM food_name f
JOIN food_group fg, yield_amount ya, yield_name yn
ON f.FoodGroupID = fg.FoodGroupId
AND f.FoodID = ya.FoodID
AND ya.YieldID = yn.YieldID
"""

pd.read_sql_query(query, connection)

Unnamed: 0,FoodDescription,FoodGroupName,YieldAmount,YieldDescription
0,"Dessert topping (non dairy), powdered",Dairy and Egg Products,22,amount to make 250ml
1,"Dessert topping (non dairy), powdered",Dairy and Egg Products,1,amount to make 15ml
2,"Milk, dry whole",Dairy and Egg Products,33,amount to make 250ml
3,"Milk, dry, buttermilk, sweet cream",Dairy and Egg Products,25,amount to make 250ml
4,"Milk, evaporated, skim, canned, undiluted, 0.2...",Dairy and Egg Products,133,amount to make 250ml
...,...,...,...,...
1421,"Soup, cream, mushroom, canned, condensed, redu...","Soups, Sauces and Gravies",132,amount to make 250ml
1422,"Cereal, hot, cream of wheat, instant: whole gr...",Breakfast cereals,27,amount to make 175ml cooked
1423,"Orange juice, frozen concentrate, undiluted, w...",Fruits and fruit juices,75,amount to make 250ml
1424,"Soup, broth, chicken, canned, condensed, reduc...","Soups, Sauces and Gravies",132,amount to make 250ml


# Database exploration

In [4]:
query = """
SELECT f.FoodDescription, fg.FoodGroupName, ya.YieldAmount, yn.YieldDescription
FROM food_name f
JOIN food_group fg, yield_amount ya, yield_name yn
ON f.FoodGroupID = fg.FoodGroupId
AND f.FoodID = ya.FoodID
AND ya.YieldID = yn.YieldID;
"""

pd.read_sql_query(query, connection)

Unnamed: 0,FoodDescription,FoodGroupName,YieldAmount,YieldDescription
0,"Dessert topping (non dairy), powdered",Dairy and Egg Products,22,amount to make 250ml
1,"Dessert topping (non dairy), powdered",Dairy and Egg Products,1,amount to make 15ml
2,"Milk, dry whole",Dairy and Egg Products,33,amount to make 250ml
3,"Milk, dry, buttermilk, sweet cream",Dairy and Egg Products,25,amount to make 250ml
4,"Milk, evaporated, skim, canned, undiluted, 0.2...",Dairy and Egg Products,133,amount to make 250ml
...,...,...,...,...
1421,"Soup, cream, mushroom, canned, condensed, redu...","Soups, Sauces and Gravies",132,amount to make 250ml
1422,"Cereal, hot, cream of wheat, instant: whole gr...",Breakfast cereals,27,amount to make 175ml cooked
1423,"Orange juice, frozen concentrate, undiluted, w...",Fruits and fruit juices,75,amount to make 250ml
1424,"Soup, broth, chicken, canned, condensed, reduc...","Soups, Sauces and Gravies",132,amount to make 250ml


In [5]:
query = """
SELECT f.FoodDescription, cf.ConversionFactorValue, m.MeasureDescription
FROM food_name f
JOIN conversion_factor cf, measure_name m
ON f.FoodID = cf.FoodID
AND cf.MeasureID = m.MeasureID;
"""

pd.read_sql_query(query, connection)

Unnamed: 0,FoodDescription,ConversionFactorValue,MeasureDescription
0,Cheese souffle,0.40152,100ml
1,Cheese souffle,0.50190,125ml
2,Cheese souffle,1.00380,250ml
3,"Chop suey, with meat, canned",0.92984,100ml
4,"Chop suey, with meat, canned",1.16230,125ml
...,...,...,...
19354,"Chocolate syrup, thin type, less sugar, with a...",1.18400,100ml
19355,"Chocolate syrup, thin type, less sugar, with a...",0.17760,15ml
19356,"Chocolate syrup, thin type, less sugar, with a...",0.35520,30ml
19357,"Granola bar, chewy, high fibre, oats and choco...",0.40000,1 bar


In [6]:
query = """
SELECT
fg.FoodGroupName, f.FoodDescription, nn.NutrientName, na.NutrientValue
FROM food_group fg
JOIN food_name f, nutrient_amount na, nutrient_name nn
ON fg.FoodGroupId = f.FoodGroupId
AND f.FoodID = na.FoodID
AND na.NutrientID = nn.NutrientId;
"""

pd.read_sql_query(query, connection)

Unnamed: 0,FoodGroupName,FoodDescription,NutrientName,NutrientValue
0,Dairy and Egg Products,"Butter, whipped",PROTEIN,0.850
1,Dairy and Egg Products,"Butter, whipped",FAT (TOTAL LIPIDS),81.110
2,Dairy and Egg Products,"Butter, whipped","CARBOHYDRATE, TOTAL (BY DIFFERENCE)",0.060
3,Dairy and Egg Products,"Butter, whipped","ASH, TOTAL",2.110
4,Dairy and Egg Products,"Butter, whipped",ENERGY (KILOCALORIES),717.000
...,...,...,...,...
524668,Snacks,"Snacks, potato chips, lightly salted","FATTY ACIDS, POLYUNSATURATED, 20:3 n-3 EICOSAT...",0.002
524669,Snacks,"Snacks, potato chips, lightly salted","CHOLINE, TOTAL",12.100
524670,Snacks,"Snacks, potato chips, lightly salted",BETAINE,0.200
524671,Snacks,"Snacks, potato chips, lightly salted","VITAMIN B12, ADDED",0.000


In [7]:
# Unique food group values
query = """
SELECT DISTINCT FoodGroupName
FROM food_group;
"""

pd.read_sql_query(query, connection)

Unnamed: 0,FoodGroupName
0,Dairy and Egg Products
1,Spices and Herbs
2,Babyfoods
3,Fats and Oils
4,Poultry Products
5,"Soups, Sauces and Gravies"
6,Sausages and Luncheon meats
7,Breakfast cereals
8,Fruits and fruit juices
9,Pork Products


# Food & Food groups exploration

In [14]:
# Let's see the different nutrients included in this database
query = """
SELECT DISTINCT NutrientName
FROM nutrient_name
"""

pd.read_sql_query(query, connection)

Unnamed: 0,NutrientName
0,PROTEIN
1,FAT (TOTAL LIPIDS)
2,"CARBOHYDRATE, TOTAL (BY DIFFERENCE)"
3,"ASH, TOTAL"
4,ENERGY (KILOCALORIES)
...,...
147,"FATTY ACIDS, MONOUNSATURATED, 12:1, LAUROLEIC"
148,"FATTY ACIDS, POLYUNSATURATED, 22:3,"
149,"FATTY ACIDS, POLYUNSATURATED, 22:2, DOCOSADIENOIC"
150,"FATTY ACIDS, POLYUNSATURATED, TOTAL OMEGA N-3"


In [42]:
# Top 20 foods by protein
query = """
SELECT DISTINCT f.FoodDescription, na.NutrientValue, nn.NutrientName
FROM food_name f
JOIN nutrient_amount na, nutrient_name nn
ON f.FoodID = na.FoodID
AND na.NutrientID = nn.NutrientID
WHERE nn.NutrientName = ?
ORDER BY na.NutrientValue DESC
LIMIT 20
"""

params = ("PROTEIN",)
pd.read_sql_query(query, connection, params = params)

Unnamed: 0,FoodDescription,NutrientValue,NutrientName
0,"Sweets, gelatin, dry powder, unsweetened",85.6,PROTEIN
1,"Game meat, native, bearded seal, (oogruk), air...",82.6,PROTEIN
2,"Egg, chicken, white, dried, powder, glucose re...",82.4,PROTEIN
3,"Egg, chicken, white, dried",81.1,PROTEIN
4,Soy protein isolate (prepared with sodium),80.69,PROTEIN
5,Soy protein isolate (prepared with potassium),80.69,PROTEIN
6,"Game meat, native, moose, dried",79.5,PROTEIN
7,"Beverage, protein powder, whey based, powder",78.13,PROTEIN
8,"Fish, steelhead trout, meat, dried",77.27,PROTEIN
9,"Game meat, native, narwhal, meat, dried",77.0,PROTEIN


In [46]:
# Let's check the top protein food groups by average
query = """
SELECT fg.FoodGroupName as 'Food Group', nn.NutrientName as 'Nutrient', AVG(na.NutrientValue) as 'Group average'
FROM food_group fg
JOIN food_name f, nutrient_amount na, nutrient_name nn
ON fg.FoodGroupID = f.FoodGroupID
AND f.FoodID = na.FoodID
AND na.NutrientID = nn.NutrientID
WHERE nn.NutrientName = 'PROTEIN'
GROUP BY (fg.FoodGroupID)
ORDER BY AVG(na.NutrientValue) DESC
"""

pd.read_sql_query(query, connection)

Unnamed: 0,Food Group,Nutrient,Group average
0,Beef Products,PROTEIN,25.417824
1,Poultry Products,PROTEIN,24.02878
2,"Lamb, Veal and Game",PROTEIN,23.333636
3,Pork Products,PROTEIN,22.663206
4,Finfish and Shellfish Products,PROTEIN,21.264154
5,Sausages and Luncheon meats,PROTEIN,16.306242
6,Legumes and Legume Products,PROTEIN,15.670478
7,Nuts and Seeds,PROTEIN,15.444892
8,Dairy and Egg Products,PROTEIN,12.61083
9,Fast Foods,PROTEIN,11.182126
