In [1]:
# Import necessary packages
from matplotlib.axes._axes import _log as matplotlib_axes_logger
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import earthpy as et
import numpy as np

# Handle date time conversions between pandas and matplotlib
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

# Dealing with error thrown by one of the plots
matplotlib_axes_logger.setLevel('ERROR')
import warnings
warnings.filterwarnings('ignore')

# Adjust font size and style of all plots in notebook with seaborn
sns.set(font_scale=1.5, style="whitegrid")

# Set working directory to earth-analytics
os.chdir(os.path.join(et.io.HOME, "earth-analytics","food-database-uk"))

# /food-database-uk/1.2_factors.csv
# /food-database-uk/factors_1.2.csv    

# Import data from .csv file 
fname = os.path.join("1.2_factors.csv")

# Import data using datetime and set index to datetime

# set the max columns to none
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
factors_fooddb_uk = pd.read_csv(fname)
factors_fooddb_uk

Unnamed: 0,Food Code,Food Name,Description,Group,Previous,Main data references,Footnote,Edible proportion,Specific gravity,Total solids,Nitrogen conversion factor,Glycerol conversion factor
0,,,,,,,,,,,,
1,,,,,,,,,,,,
2,13-145,"Ackee, canned, drained",8 cans,DG,554,"MW4, 1978; and Vegetables, Herbs and Spices Su...",,N,,,,0.8
3,13-146,"Agar, dried",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,1.00,,,,
4,13-147,"Agar, dried, soaked and drained",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,1.00,,,,
5,13-148,"Alfalfa sprouts, raw",Analytical and literature sources,DG,,"Vegetables, Herbs and Spices Supplement, 1991",,1.00,,,,
6,13-801,"Allspice, ground",Literature sources,H,,Marsh et al. (1977) Composition of foods: spic...,,1.00,,,,
7,14-870,"Almonds, flaked and ground",10 samples,GA,14-801 14-855 50-972,"Reviewed 2013. LGC, Snacks and nuts, 1989-1991...",,1.00,,,5.18,0.956
8,14-897,"Almonds, toasted",Literature sources,GA,14-803,"USDA SR28, 2015",,1.00,,,5.18,0.956
9,14-898,"Almonds, weighed with shells",Calculated from 14-896,GA,14-883,"USDA SR28, 2015",,0.37,,,5.18,0.956


In [7]:
print(factors_fooddb_uk.columns.tolist())

['Food Code', 'Food Name', 'Description', 'Group', 'Previous', 'Main data references', 'Footnote', 'Edible proportion', 'Specific gravity', 'Total solids', 'Nitrogen conversion factor', 'Glycerol conversion factor']


In [8]:
factors_fooddb_uk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2889 entries, 0 to 2888
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Food Code                   2887 non-null   object 
 1   Food Name                   2887 non-null   object 
 2   Description                 2887 non-null   object 
 3   Group                       2887 non-null   object 
 4   Previous                    1867 non-null   object 
 5   Main data references        2886 non-null   object 
 6   Footnote                    12 non-null     object 
 7   Edible proportion           2887 non-null   object 
 8   Specific gravity            54 non-null     float64
 9   Total solids                14 non-null     object 
 10  Nitrogen conversion factor  1277 non-null   float64
 11  Glycerol conversion factor  956 non-null    float64
dtypes: float64(3), object(9)
memory usage: 271.0+ KB


In [3]:
factors_fooddb_uk_select = factors_fooddb_uk[["Food Code", "Food Name", "Group", "Edible proportion", "Nitrogen conversion factor", "Glycerol conversion factor"]]
factors_fooddb_uk_select

Unnamed: 0,Food Code,Food Name,Group,Edible proportion,Nitrogen conversion factor,Glycerol conversion factor
0,,,,,,
1,,,,,,
2,13-145,"Ackee, canned, drained",DG,N,,0.8
3,13-146,"Agar, dried",DG,1.00,,
4,13-147,"Agar, dried, soaked and drained",DG,1.00,,
5,13-148,"Alfalfa sprouts, raw",DG,1.00,,
6,13-801,"Allspice, ground",H,1.00,,
7,14-870,"Almonds, flaked and ground",GA,1.00,5.18,0.956
8,14-897,"Almonds, toasted",GA,1.00,5.18,0.956
9,14-898,"Almonds, weighed with shells",GA,0.37,5.18,0.956


In [4]:
factors_fooddb_uk_select = factors_fooddb_uk_select.dropna(subset=['Food Code'])
factors_fooddb_uk_select.head()

Unnamed: 0,Food Code,Food Name,Group,Edible proportion,Nitrogen conversion factor,Glycerol conversion factor
2,13-145,"Ackee, canned, drained",DG,N,,0.8
3,13-146,"Agar, dried",DG,1.00,,
4,13-147,"Agar, dried, soaked and drained",DG,1.00,,
5,13-148,"Alfalfa sprouts, raw",DG,1.00,,
6,13-801,"Allspice, ground",H,1.00,,


In [5]:
factors_fooddb_uk_select

Unnamed: 0,Food Code,Food Name,Group,Edible proportion,Nitrogen conversion factor,Glycerol conversion factor
2,13-145,"Ackee, canned, drained",DG,N,,0.8
3,13-146,"Agar, dried",DG,1.00,,
4,13-147,"Agar, dried, soaked and drained",DG,1.00,,
5,13-148,"Alfalfa sprouts, raw",DG,1.00,,
6,13-801,"Allspice, ground",H,1.00,,
7,14-870,"Almonds, flaked and ground",GA,1.00,5.18,0.956
8,14-897,"Almonds, toasted",GA,1.00,5.18,0.956
9,14-898,"Almonds, weighed with shells",GA,0.37,5.18,0.956
10,14-896,"Almonds, whole kernels",GA,1.00,5.18,0.956
11,13-150,"Amaranth leaves, boiled in unsalted water",DG,1.00,,


In [6]:
len(factors_fooddb_uk_select)

2887

In [9]:
#extract rows/columns with missing values

factors_fooddb_uk_select_nan = factors_fooddb_uk_select.isnull()
factors_fooddb_uk_select_nan

Unnamed: 0,Food Code,Food Name,Group,Edible proportion,Nitrogen conversion factor,Glycerol conversion factor
2,False,False,False,False,True,False
3,False,False,False,False,True,True
4,False,False,False,False,True,True
5,False,False,False,False,True,True
6,False,False,False,False,True,True
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False
10,False,False,False,False,False,False
11,False,False,False,False,True,True
